From 6793639e677012349fd1fee035c78dd67d43c1e5 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 20 Feb 2026 13:31:24 -0500 Subject: [PATCH 1/2] Fix #1641: Handle cleanup when exceptions are thrown --- cuda_bindings/cuda/bindings/driver.pyx.in | 6862 +++++++++++--------- cuda_bindings/cuda/bindings/nvrtc.pyx.in | 83 +- cuda_bindings/cuda/bindings/runtime.pyx.in | 5018 +++++++------- 3 files changed, 6532 insertions(+), 5431 deletions(-) diff --git a/cuda_bindings/cuda/bindings/driver.pyx.in b/cuda_bindings/cuda/bindings/driver.pyx.in index 5a964ce17d..4384b24684 100644 --- a/cuda_bindings/cuda/bindings/driver.pyx.in +++ b/cuda_bindings/cuda/bindings/driver.pyx.in @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# This code was automatically generated with version 13.1.0, generator version fd3f910. Do not modify it directly. +# This code was automatically generated with version 13.1.0, generator version f251d07. Do not modify it directly. from typing import Any, Optional import cython import ctypes @@ -25622,8 +25622,9 @@ def cuGetErrorString(error not None : CUresult): -------- :py:obj:`~.CUresult`, :py:obj:`~.cudaGetErrorString` """ - cdef cydriver.CUresult cyerror = int(error) cdef const char* pStr = NULL + cdef cydriver.CUresult cyerror + cyerror = int(error) with nogil: err = cydriver.cuGetErrorString(cyerror, &pStr) if err != cydriver.CUDA_SUCCESS: @@ -25658,8 +25659,9 @@ def cuGetErrorName(error not None : CUresult): -------- :py:obj:`~.CUresult`, :py:obj:`~.cudaGetErrorName` """ - cdef cydriver.CUresult cyerror = int(error) cdef const char* pStr = NULL + cdef cydriver.CUresult cyerror + cyerror = int(error) with nogil: err = cydriver.cuGetErrorName(cyerror, &pStr) if err != cydriver.CUDA_SUCCESS: @@ -25751,7 +25753,8 @@ def cuDeviceGet(int ordinal): -------- :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGetLuid`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport` """ - cdef CUdevice device = CUdevice() + cdef CUdevice device + device = CUdevice() with nogil: err = cydriver.cuDeviceGet(device._pvt_ptr, ordinal) if err != cydriver.CUDA_SUCCESS: @@ -25818,6 +25821,9 @@ def cuDeviceGetName(int length, dev): :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGetLuid`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaGetDeviceProperties` """ cdef cydriver.CUdevice cydev + cdef char * name + pyname = b" " * length + name = pyname if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -25825,8 +25831,6 @@ def cuDeviceGetName(int length, dev): else: pdev = int(CUdevice(dev)) cydev = pdev - pyname = b" " * length - cdef char* name = pyname with nogil: err = cydriver.cuDeviceGetName(name, length, cydev) if err != cydriver.CUDA_SUCCESS: @@ -25861,6 +25865,8 @@ def cuDeviceGetUuid(dev): :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetLuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cudaGetDeviceProperties` """ cdef cydriver.CUdevice cydev + cdef CUuuid uuid + uuid = CUuuid() if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -25868,7 +25874,6 @@ def cuDeviceGetUuid(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef CUuuid uuid = CUuuid() with nogil: err = cydriver.cuDeviceGetUuid(uuid._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: @@ -25904,6 +25909,8 @@ def cuDeviceGetLuid(dev): :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaGetDeviceProperties` """ cdef cydriver.CUdevice cydev + cdef unsigned int deviceNodeMask = 0 + cdef char luid[8] if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -25911,8 +25918,6 @@ def cuDeviceGetLuid(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef char luid[8] - cdef unsigned int deviceNodeMask = 0 with nogil: err = cydriver.cuDeviceGetLuid(luid, &deviceNodeMask, cydev) if err != cydriver.CUDA_SUCCESS: @@ -25946,6 +25951,7 @@ def cuDeviceTotalMem(dev): :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaMemGetInfo` """ cdef cydriver.CUdevice cydev + cdef size_t numbytes = 0 if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -25953,7 +25959,6 @@ def cuDeviceTotalMem(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef size_t numbytes = 0 with nogil: err = cydriver.cuDeviceTotalMem(&numbytes, cydev) if err != cydriver.CUDA_SUCCESS: @@ -25993,6 +25998,9 @@ def cuDeviceGetTexture1DLinearMaxWidth(pformat not None : CUarray_format, unsign :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cudaMemGetInfo`, :py:obj:`~.cuDeviceTotalMem` """ cdef cydriver.CUdevice cydev + cdef cydriver.CUarray_format cypformat + cdef size_t maxWidthInElements = 0 + cypformat = int(pformat) if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -26000,8 +26008,6 @@ def cuDeviceGetTexture1DLinearMaxWidth(pformat not None : CUarray_format, unsign else: pdev = int(CUdevice(dev)) cydev = pdev - cdef size_t maxWidthInElements = 0 - cdef cydriver.CUarray_format cypformat = int(pformat) with nogil: err = cydriver.cuDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, cypformat, numChannels, cydev) if err != cydriver.CUDA_SUCCESS: @@ -26037,6 +26043,9 @@ def cuDeviceGetAttribute(attrib not None : CUdevice_attribute, dev): :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem`, :py:obj:`~.cuDeviceGetExecAffinitySupport`, :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaGetDeviceProperties` """ cdef cydriver.CUdevice cydev + cdef cydriver.CUdevice_attribute cyattrib + cdef int pi = 0 + cyattrib = int(attrib) if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -26044,8 +26053,6 @@ def cuDeviceGetAttribute(attrib not None : CUdevice_attribute, dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef int pi = 0 - cdef cydriver.CUdevice_attribute cyattrib = int(attrib) with nogil: err = cydriver.cuDeviceGetAttribute(&pi, cyattrib, cydev) if err != cydriver.CUDA_SUCCESS: @@ -26095,30 +26102,33 @@ def cuDeviceGetHostAtomicCapabilities(operations : Optional[tuple[CUatomicOperat :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetP2PAtomicCapabilities`, :py:obj:`~.cudaDeviceGeHostAtomicCapabilities` """ cdef cydriver.CUdevice cydev - if dev is None: - pdev = 0 - elif isinstance(dev, (CUdevice,)): - pdev = int(dev) - else: - pdev = int(CUdevice(dev)) - cydev = pdev - operations = [] if operations is None else operations - if not all(isinstance(_x, (CUatomicOperation)) for _x in operations): - raise TypeError("Argument 'operations' is not instance of type (expected tuple[cydriver.CUatomicOperation] or list[cydriver.CUatomicOperation]") + cdef vector[cydriver.CUatomicOperation] cyoperations cdef unsigned int* cycapabilities = NULL pycapabilities = [] - if count != 0: - cycapabilities = calloc(count, sizeof(unsigned int)) - if cycapabilities is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) - cdef vector[cydriver.CUatomicOperation] cyoperations = [int(pyoperations) for pyoperations in (operations)] - if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count)) - with nogil: - err = cydriver.cuDeviceGetHostAtomicCapabilities(cycapabilities, cyoperations.data(), count, cydev) - if CUresult(err) == CUresult(0): - pycapabilities = [cycapabilities[idx] for idx in range(count)] - if cycapabilities is not NULL: - free(cycapabilities) + try: + if count != 0: + cycapabilities = calloc(count, sizeof(unsigned int)) + if cycapabilities is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) + operations = [] if operations is None else operations + if not all(isinstance(_x, (CUatomicOperation)) for _x in operations): + raise TypeError("Argument 'operations' is not instance of type (expected tuple[cydriver.CUatomicOperation] or list[cydriver.CUatomicOperation]") + cyoperations = operations + if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count)) + if dev is None: + pdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + else: + pdev = int(CUdevice(dev)) + cydev = pdev + with nogil: + err = cydriver.cuDeviceGetHostAtomicCapabilities(cycapabilities, cyoperations.data(), count, cydev) + finally: + if CUresult(err) == CUresult(0): + pycapabilities = [cycapabilities[idx] for idx in range(count)] + if cycapabilities is not NULL: + free(cycapabilities) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, pycapabilities) @@ -26208,18 +26218,21 @@ def cuDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, dev, int flags): :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuDestroyExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync` """ cdef cydriver.CUdevice cydev - if dev is None: - pdev = 0 - elif isinstance(dev, (CUdevice,)): - pdev = int(dev) - else: - pdev = int(CUdevice(dev)) - cydev = pdev cdef _HelperInputVoidPtrStruct cynvSciSyncAttrListHelper - cdef void* cynvSciSyncAttrList = _helper_input_void_ptr(nvSciSyncAttrList, &cynvSciSyncAttrListHelper) - with nogil: - err = cydriver.cuDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList, cydev, flags) - _helper_input_void_ptr_free(&cynvSciSyncAttrListHelper) + cdef void* cynvSciSyncAttrList + try: + cynvSciSyncAttrList = _helper_input_void_ptr(nvSciSyncAttrList, &cynvSciSyncAttrListHelper) + if dev is None: + pdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + else: + pdev = int(CUdevice(dev)) + cydev = pdev + with nogil: + err = cydriver.cuDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList, cydev, flags) + finally: + _helper_input_void_ptr_free(&cynvSciSyncAttrListHelper) return (_CUresult(err),) {{endif}} @@ -26255,13 +26268,6 @@ def cuDeviceSetMemPool(dev, pool): Use :py:obj:`~.cuMemAllocFromPoolAsync` to specify asynchronous allocations from a device different than the one the stream runs on. """ cdef cydriver.CUmemoryPool cypool - if pool is None: - ppool = 0 - elif isinstance(pool, (CUmemoryPool,)): - ppool = int(pool) - else: - ppool = int(CUmemoryPool(pool)) - cypool = ppool cdef cydriver.CUdevice cydev if dev is None: pdev = 0 @@ -26270,6 +26276,13 @@ def cuDeviceSetMemPool(dev, pool): else: pdev = int(CUdevice(dev)) cydev = pdev + if pool is None: + ppool = 0 + elif isinstance(pool, (CUmemoryPool,)): + ppool = int(pool) + else: + ppool = int(CUmemoryPool(pool)) + cypool = ppool with nogil: err = cydriver.cuDeviceSetMemPool(cydev, cypool) return (_CUresult(err),) @@ -26304,6 +26317,8 @@ def cuDeviceGetMemPool(dev): :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuDeviceSetMemPool` """ cdef cydriver.CUdevice cydev + cdef CUmemoryPool pool + pool = CUmemoryPool() if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -26311,7 +26326,6 @@ def cuDeviceGetMemPool(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef CUmemoryPool pool = CUmemoryPool() with nogil: err = cydriver.cuDeviceGetMemPool(pool._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: @@ -26345,6 +26359,8 @@ def cuDeviceGetDefaultMemPool(dev): :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemPoolTrimTo`, :py:obj:`~.cuMemPoolGetAttribute`, :py:obj:`~.cuMemPoolSetAttribute`, :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate` """ cdef cydriver.CUdevice cydev + cdef CUmemoryPool pool_out + pool_out = CUmemoryPool() if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -26352,7 +26368,6 @@ def cuDeviceGetDefaultMemPool(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef CUmemoryPool pool_out = CUmemoryPool() with nogil: err = cydriver.cuDeviceGetDefaultMemPool(pool_out._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: @@ -26392,6 +26407,9 @@ def cuDeviceGetExecAffinitySupport(typename not None : CUexecAffinityType, dev): :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem` """ cdef cydriver.CUdevice cydev + cdef cydriver.CUexecAffinityType cytypename + cdef int pi = 0 + cytypename = int(typename) if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -26399,8 +26417,6 @@ def cuDeviceGetExecAffinitySupport(typename not None : CUexecAffinityType, dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef int pi = 0 - cdef cydriver.CUexecAffinityType cytypename = int(typename) with nogil: err = cydriver.cuDeviceGetExecAffinitySupport(&pi, cytypename, cydev) if err != cydriver.CUDA_SUCCESS: @@ -26452,8 +26468,10 @@ def cuFlushGPUDirectRDMAWrites(target not None : CUflushGPUDirectRDMAWritesTarge CUresult :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, """ - cdef cydriver.CUflushGPUDirectRDMAWritesTarget cytarget = int(target) - cdef cydriver.CUflushGPUDirectRDMAWritesScope cyscope = int(scope) + cdef cydriver.CUflushGPUDirectRDMAWritesScope cyscope + cdef cydriver.CUflushGPUDirectRDMAWritesTarget cytarget + cytarget = int(target) + cyscope = int(scope) with nogil: err = cydriver.cuFlushGPUDirectRDMAWrites(cytarget, cyscope) return (_CUresult(err),) @@ -26524,6 +26542,8 @@ def cuDeviceGetProperties(dev): :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem` """ cdef cydriver.CUdevice cydev + cdef CUdevprop prop + prop = CUdevprop() if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -26531,7 +26551,6 @@ def cuDeviceGetProperties(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef CUdevprop prop = CUdevprop() with nogil: err = cydriver.cuDeviceGetProperties(prop._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: @@ -26572,6 +26591,8 @@ def cuDeviceComputeCapability(dev): :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetCount`, :py:obj:`~.cuDeviceGetName`, :py:obj:`~.cuDeviceGetUuid`, :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceTotalMem` """ cdef cydriver.CUdevice cydev + cdef int minor = 0 + cdef int major = 0 if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -26579,8 +26600,6 @@ def cuDeviceComputeCapability(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef int major = 0 - cdef int minor = 0 with nogil: err = cydriver.cuDeviceComputeCapability(&major, &minor, cydev) if err != cydriver.CUDA_SUCCESS: @@ -26631,6 +26650,8 @@ def cuDevicePrimaryCtxRetain(dev): :py:obj:`~.cuDevicePrimaryCtxRelease`, :py:obj:`~.cuDevicePrimaryCtxSetFlags`, :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize` """ cdef cydriver.CUdevice cydev + cdef CUcontext pctx + pctx = CUcontext() if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -26638,7 +26659,6 @@ def cuDevicePrimaryCtxRetain(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef CUcontext pctx = CUcontext() with nogil: err = cydriver.cuDevicePrimaryCtxRetain(pctx._pvt_ptr, cydev) if err != cydriver.CUDA_SUCCESS: @@ -26834,6 +26854,8 @@ def cuDevicePrimaryCtxGetState(dev): -------- :py:obj:`~.cuDevicePrimaryCtxSetFlags`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxSetFlags`, :py:obj:`~.cudaGetDeviceFlags` """ + cdef int active = 0 + cdef unsigned int flags = 0 cdef cydriver.CUdevice cydev if dev is None: pdev = 0 @@ -26842,8 +26864,6 @@ def cuDevicePrimaryCtxGetState(dev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef unsigned int flags = 0 - cdef int active = 0 with nogil: err = cydriver.cuDevicePrimaryCtxGetState(cydev, &flags, &active) if err != cydriver.CUDA_SUCCESS: @@ -27063,6 +27083,10 @@ def cuCtxCreate(ctxCreateParams : Optional[CUctxCreateParams], unsigned int flag :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCoredumpSetAttributeGlobal`, :py:obj:`~.cuCoredumpSetAttribute`, :py:obj:`~.cuCtxSynchronize` """ cdef cydriver.CUdevice cydev + cdef cydriver.CUctxCreateParams* cyctxCreateParams_ptr + cdef CUcontext pctx + pctx = CUcontext() + cyctxCreateParams_ptr = ctxCreateParams._pvt_ptr if ctxCreateParams is not None else NULL if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -27070,8 +27094,6 @@ def cuCtxCreate(ctxCreateParams : Optional[CUctxCreateParams], unsigned int flag else: pdev = int(CUdevice(dev)) cydev = pdev - cdef CUcontext pctx = CUcontext() - cdef cydriver.CUctxCreateParams* cyctxCreateParams_ptr = ctxCreateParams._pvt_ptr if ctxCreateParams is not None else NULL with nogil: err = cydriver.cuCtxCreate(pctx._pvt_ptr, cyctxCreateParams_ptr, flags, cydev) if err != cydriver.CUDA_SUCCESS: @@ -27208,7 +27230,8 @@ def cuCtxPopCurrent(): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize` """ - cdef CUcontext pctx = CUcontext() + cdef CUcontext pctx + pctx = CUcontext() with nogil: err = cydriver.cuCtxPopCurrent(pctx._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -27280,7 +27303,8 @@ def cuCtxGetCurrent(): -------- :py:obj:`~.cuCtxSetCurrent`, :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cudaGetDevice` """ - cdef CUcontext pctx = CUcontext() + cdef CUcontext pctx + pctx = CUcontext() with nogil: err = cydriver.cuCtxGetCurrent(pctx._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -27307,7 +27331,8 @@ def cuCtxGetDevice(): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaGetDevice` """ - cdef CUdevice device = CUdevice() + cdef CUdevice device + device = CUdevice() with nogil: err = cydriver.cuCtxGetDevice(device._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -27342,6 +27367,8 @@ def cuCtxGetDevice_v2(ctx): :py:obj:`~.cuCtxGetCurrent`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent` """ cdef cydriver.CUcontext cyctx + cdef CUdevice device + device = CUdevice() if ctx is None: pctx = 0 elif isinstance(ctx, (CUcontext,)): @@ -27349,7 +27376,6 @@ def cuCtxGetDevice_v2(ctx): else: pctx = int(CUcontext(ctx)) cyctx = pctx - cdef CUdevice device = CUdevice() with nogil: err = cydriver.cuCtxGetDevice_v2(device._pvt_ptr, cyctx) if err != cydriver.CUDA_SUCCESS: @@ -27440,6 +27466,7 @@ def cuCtxGetId(ctx): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPushCurrent` """ + cdef unsigned long long ctxId = 0 cdef cydriver.CUcontext cyctx if ctx is None: pctx = 0 @@ -27448,7 +27475,6 @@ def cuCtxGetId(ctx): else: pctx = int(CUcontext(ctx)) cyctx = pctx - cdef unsigned long long ctxId = 0 with nogil: err = cydriver.cuCtxGetId(cyctx, &ctxId) if err != cydriver.CUDA_SUCCESS: @@ -27630,7 +27656,8 @@ def cuCtxSetLimit(limit not None : CUlimit, size_t value): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaDeviceSetLimit` """ - cdef cydriver.CUlimit cylimit = int(limit) + cdef cydriver.CUlimit cylimit + cylimit = int(limit) with nogil: err = cydriver.cuCtxSetLimit(cylimit, value) return (_CUresult(err),) @@ -27686,8 +27713,9 @@ def cuCtxGetLimit(limit not None : CUlimit): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaDeviceGetLimit` """ + cdef cydriver.CUlimit cylimit cdef size_t pvalue = 0 - cdef cydriver.CUlimit cylimit = int(limit) + cylimit = int(limit) with nogil: err = cydriver.cuCtxGetLimit(&pvalue, cylimit) if err != cydriver.CUDA_SUCCESS: @@ -27795,7 +27823,8 @@ def cuCtxSetCacheConfig(config not None : CUfunc_cache): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cuKernelSetCacheConfig` """ - cdef cydriver.CUfunc_cache cyconfig = int(config) + cdef cydriver.CUfunc_cache cyconfig + cyconfig = int(config) with nogil: err = cydriver.cuCtxSetCacheConfig(cyconfig) return (_CUresult(err),) @@ -27833,6 +27862,7 @@ def cuCtxGetApiVersion(ctx): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize` """ + cdef unsigned int version = 0 cdef cydriver.CUcontext cyctx if ctx is None: pctx = 0 @@ -27841,7 +27871,6 @@ def cuCtxGetApiVersion(ctx): else: pctx = int(CUcontext(ctx)) cyctx = pctx - cdef unsigned int version = 0 with nogil: err = cydriver.cuCtxGetApiVersion(cyctx, &version) if err != cydriver.CUDA_SUCCESS: @@ -27886,8 +27915,8 @@ def cuCtxGetStreamPriorityRange(): -------- :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cudaDeviceGetStreamPriorityRange` """ - cdef int leastPriority = 0 cdef int greatestPriority = 0 + cdef int leastPriority = 0 with nogil: err = cydriver.cuCtxGetStreamPriorityRange(&leastPriority, &greatestPriority) if err != cydriver.CUDA_SUCCESS: @@ -27946,8 +27975,10 @@ def cuCtxGetExecAffinity(typename not None : CUexecAffinityType): -------- :py:obj:`~.CUexecAffinityParam` """ - cdef CUexecAffinityParam pExecAffinity = CUexecAffinityParam() - cdef cydriver.CUexecAffinityType cytypename = int(typename) + cdef cydriver.CUexecAffinityType cytypename + cdef CUexecAffinityParam pExecAffinity + pExecAffinity = CUexecAffinityParam() + cytypename = int(typename) with nogil: err = cydriver.cuCtxGetExecAffinity(pExecAffinity._pvt_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: @@ -27995,13 +28026,6 @@ def cuCtxRecordEvent(hCtx, hEvent): The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` if the specified context `hCtx` has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures. """ cdef cydriver.CUevent cyhEvent - if hEvent is None: - phEvent = 0 - elif isinstance(hEvent, (CUevent,)): - phEvent = int(hEvent) - else: - phEvent = int(CUevent(hEvent)) - cyhEvent = phEvent cdef cydriver.CUcontext cyhCtx if hCtx is None: phCtx = 0 @@ -28010,6 +28034,13 @@ def cuCtxRecordEvent(hCtx, hEvent): else: phCtx = int(CUcontext(hCtx)) cyhCtx = phCtx + if hEvent is None: + phEvent = 0 + elif isinstance(hEvent, (CUevent,)): + phEvent = int(hEvent) + else: + phEvent = int(CUevent(hEvent)) + cyhEvent = phEvent with nogil: err = cydriver.cuCtxRecordEvent(cyhCtx, cyhEvent) return (_CUresult(err),) @@ -28054,13 +28085,6 @@ def cuCtxWaitEvent(hCtx, hEvent): The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` and invalidate the capture if the specified event `hEvent` is part of an ongoing capture sequence or if the specified context `hCtx` has a stream in the capture mode. """ cdef cydriver.CUevent cyhEvent - if hEvent is None: - phEvent = 0 - elif isinstance(hEvent, (CUevent,)): - phEvent = int(hEvent) - else: - phEvent = int(CUevent(hEvent)) - cyhEvent = phEvent cdef cydriver.CUcontext cyhCtx if hCtx is None: phCtx = 0 @@ -28069,6 +28093,13 @@ def cuCtxWaitEvent(hCtx, hEvent): else: phCtx = int(CUcontext(hCtx)) cyhCtx = phCtx + if hEvent is None: + phEvent = 0 + elif isinstance(hEvent, (CUevent,)): + phEvent = int(hEvent) + else: + phEvent = int(CUevent(hEvent)) + cyhEvent = phEvent with nogil: err = cydriver.cuCtxWaitEvent(cyhCtx, cyhEvent) return (_CUresult(err),) @@ -28107,7 +28138,8 @@ def cuCtxAttach(unsigned int flags): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxDetach`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize` """ - cdef CUcontext pctx = CUcontext() + cdef CUcontext pctx + pctx = CUcontext() with nogil: err = cydriver.cuCtxAttach(pctx._pvt_ptr, flags) if err != cydriver.CUDA_SUCCESS: @@ -28249,7 +28281,8 @@ def cuCtxSetSharedMemConfig(config not None : CUsharedconfig): -------- :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuCtxDestroy`, :py:obj:`~.cuCtxGetApiVersion`, :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxGetDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuCtxGetLimit`, :py:obj:`~.cuCtxPopCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuCtxSetLimit`, :py:obj:`~.cuCtxSynchronize`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cudaDeviceSetSharedMemConfig` """ - cdef cydriver.CUsharedconfig cyconfig = int(config) + cdef cydriver.CUsharedconfig cyconfig + cyconfig = int(config) with nogil: err = cydriver.cuCtxSetSharedMemConfig(cyconfig) return (_CUresult(err),) @@ -28286,7 +28319,8 @@ def cuModuleLoad(char* fname): -------- :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload` """ - cdef CUmodule module = CUmodule() + cdef CUmodule module + module = CUmodule() with nogil: err = cydriver.cuModuleLoad(module._pvt_ptr, fname) if err != cydriver.CUDA_SUCCESS: @@ -28321,12 +28355,16 @@ def cuModuleLoadData(image): -------- :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload` """ - cdef CUmodule module = CUmodule() cdef _HelperInputVoidPtrStruct cyimageHelper - cdef void* cyimage = _helper_input_void_ptr(image, &cyimageHelper) - with nogil: - err = cydriver.cuModuleLoadData(module._pvt_ptr, cyimage) - _helper_input_void_ptr_free(&cyimageHelper) + cdef void* cyimage + cdef CUmodule module + try: + module = CUmodule() + cyimage = _helper_input_void_ptr(image, &cyimageHelper) + with nogil: + err = cydriver.cuModuleLoadData(module._pvt_ptr, cyimage) + finally: + _helper_input_void_ptr_free(&cyimageHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, module) @@ -28365,22 +28403,29 @@ def cuModuleLoadDataEx(image, unsigned int numOptions, options : Optional[tuple[ -------- :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload` """ - optionValues = [] if optionValues is None else optionValues - options = [] if options is None else options - if not all(isinstance(_x, (CUjit_option)) for _x in options): - raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") - cdef CUmodule module = CUmodule() + cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues + cdef void ** cyoptionValues_ptr + cdef vector[cydriver.CUjit_option] cyoptions cdef _HelperInputVoidPtrStruct cyimageHelper - cdef void* cyimage = _helper_input_void_ptr(image, &cyimageHelper) - if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) - if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) - cdef vector[cydriver.CUjit_option] cyoptions = [int(pyoptions) for pyoptions in (options)] - pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cyoptionValues_ptr = voidStarHelperoptionValues.cptr - with nogil: - err = cydriver.cuModuleLoadDataEx(module._pvt_ptr, cyimage, numOptions, cyoptions.data(), cyoptionValues_ptr) - _helper_input_void_ptr_free(&cyimageHelper) + cdef void* cyimage + cdef CUmodule module + try: + module = CUmodule() + cyimage = _helper_input_void_ptr(image, &cyimageHelper) + if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) + if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) + options = [] if options is None else options + if not all(isinstance(_x, (CUjit_option)) for _x in options): + raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") + cyoptions = options + optionValues = [] if optionValues is None else optionValues + pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] + voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) + cyoptionValues_ptr = voidStarHelperoptionValues.cptr + with nogil: + err = cydriver.cuModuleLoadDataEx(module._pvt_ptr, cyimage, numOptions, cyoptions.data(), cyoptionValues_ptr) + finally: + _helper_input_void_ptr_free(&cyimageHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, module) @@ -28419,12 +28464,16 @@ def cuModuleLoadFatBinary(fatCubin): -------- :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleUnload` """ - cdef CUmodule module = CUmodule() cdef _HelperInputVoidPtrStruct cyfatCubinHelper - cdef void* cyfatCubin = _helper_input_void_ptr(fatCubin, &cyfatCubinHelper) - with nogil: - err = cydriver.cuModuleLoadFatBinary(module._pvt_ptr, cyfatCubin) - _helper_input_void_ptr_free(&cyfatCubinHelper) + cdef void* cyfatCubin + cdef CUmodule module + try: + module = CUmodule() + cyfatCubin = _helper_input_void_ptr(fatCubin, &cyfatCubinHelper) + with nogil: + err = cydriver.cuModuleLoadFatBinary(module._pvt_ptr, cyfatCubin) + finally: + _helper_input_void_ptr_free(&cyfatCubinHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, module) @@ -28526,6 +28575,8 @@ def cuModuleGetFunction(hmod, char* name): :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload` """ cdef cydriver.CUmodule cyhmod + cdef CUfunction hfunc + hfunc = CUfunction() if hmod is None: phmod = 0 elif isinstance(hmod, (CUmodule,)): @@ -28533,7 +28584,6 @@ def cuModuleGetFunction(hmod, char* name): else: phmod = int(CUmodule(hmod)) cyhmod = phmod - cdef CUfunction hfunc = CUfunction() with nogil: err = cydriver.cuModuleGetFunction(hfunc._pvt_ptr, cyhmod, name) if err != cydriver.CUDA_SUCCESS: @@ -28562,6 +28612,7 @@ def cuModuleGetFunctionCount(mod): Number of functions found within the module """ cdef cydriver.CUmodule cymod + cdef unsigned int count = 0 if mod is None: pmod = 0 elif isinstance(mod, (CUmodule,)): @@ -28569,7 +28620,6 @@ def cuModuleGetFunctionCount(mod): else: pmod = int(CUmodule(mod)) cymod = pmod - cdef unsigned int count = 0 with nogil: err = cydriver.cuModuleGetFunctionCount(&count, cymod) if err != cydriver.CUDA_SUCCESS: @@ -28612,25 +28662,27 @@ def cuModuleEnumerateFunctions(unsigned int numFunctions, mod): :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetFunctionCount`, :py:obj:`~.cuFuncIsLoaded`, :py:obj:`~.cuFuncLoad` """ cdef cydriver.CUmodule cymod - if mod is None: - pmod = 0 - elif isinstance(mod, (CUmodule,)): - pmod = int(mod) - else: - pmod = int(CUmodule(mod)) - cymod = pmod cdef cydriver.CUfunction* cyfunctions = NULL pyfunctions = [] - if numFunctions != 0: - cyfunctions = calloc(numFunctions, sizeof(cydriver.CUfunction)) - if cyfunctions is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(numFunctions) + 'x' + str(sizeof(cydriver.CUfunction))) - with nogil: - err = cydriver.cuModuleEnumerateFunctions(cyfunctions, numFunctions, cymod) - if CUresult(err) == CUresult(0): - pyfunctions = [CUfunction(init_value=cyfunctions[idx]) for idx in range(numFunctions)] - if cyfunctions is not NULL: - free(cyfunctions) + try: + if numFunctions != 0: + cyfunctions = calloc(numFunctions, sizeof(cydriver.CUfunction)) + if cyfunctions is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(numFunctions) + 'x' + str(sizeof(cydriver.CUfunction))) + if mod is None: + pmod = 0 + elif isinstance(mod, (CUmodule,)): + pmod = int(mod) + else: + pmod = int(CUmodule(mod)) + cymod = pmod + with nogil: + err = cydriver.cuModuleEnumerateFunctions(cyfunctions, numFunctions, cymod) + finally: + if CUresult(err) == CUresult(0): + pyfunctions = [CUfunction(init_value=cyfunctions[idx]) for idx in range(numFunctions)] + if cyfunctions is not NULL: + free(cyfunctions) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, pyfunctions) @@ -28669,6 +28721,9 @@ def cuModuleGetGlobal(hmod, char* name): :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload`, :py:obj:`~.cudaGetSymbolAddress`, :py:obj:`~.cudaGetSymbolSize` """ cdef cydriver.CUmodule cyhmod + cdef size_t numbytes = 0 + cdef CUdeviceptr dptr + dptr = CUdeviceptr() if hmod is None: phmod = 0 elif isinstance(hmod, (CUmodule,)): @@ -28676,8 +28731,6 @@ def cuModuleGetGlobal(hmod, char* name): else: phmod = int(CUmodule(hmod)) cyhmod = phmod - cdef CUdeviceptr dptr = CUdeviceptr() - cdef size_t numbytes = 0 with nogil: err = cydriver.cuModuleGetGlobal(dptr._pvt_ptr, &numbytes, cyhmod, name) if err != cydriver.CUDA_SUCCESS: @@ -28738,22 +28791,28 @@ def cuLinkCreate(unsigned int numOptions, options : Optional[tuple[CUjit_option] ----- For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted """ - optionValues = [] if optionValues is None else optionValues - options = [] if options is None else options - if not all(isinstance(_x, (CUjit_option)) for _x in options): - raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") - if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) - if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) - cdef vector[cydriver.CUjit_option] cyoptions = [int(pyoptions) for pyoptions in (options)] - pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cyoptionValues_ptr = voidStarHelperoptionValues.cptr - cdef CUlinkState stateOut = CUlinkState() - with nogil: - err = cydriver.cuLinkCreate(numOptions, cyoptions.data(), cyoptionValues_ptr, stateOut._pvt_ptr) - stateOut._keepalive.append(voidStarHelperoptionValues) - for option in pylist: - stateOut._keepalive.append(option) + cdef CUlinkState stateOut + cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues + cdef void ** cyoptionValues_ptr + cdef vector[cydriver.CUjit_option] cyoptions + try: + if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) + if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) + options = [] if options is None else options + if not all(isinstance(_x, (CUjit_option)) for _x in options): + raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") + cyoptions = options + optionValues = [] if optionValues is None else optionValues + pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] + voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) + cyoptionValues_ptr = voidStarHelperoptionValues.cptr + stateOut = CUlinkState() + with nogil: + err = cydriver.cuLinkCreate(numOptions, cyoptions.data(), cyoptionValues_ptr, stateOut._pvt_ptr) + finally: + stateOut._keepalive.append(voidStarHelperoptionValues) + for option in pylist: + stateOut._keepalive.append(option) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, stateOut) @@ -28807,30 +28866,37 @@ def cuLinkAddData(state, typename not None : CUjitInputType, data, size_t size, ----- For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted """ - optionValues = [] if optionValues is None else optionValues - options = [] if options is None else options - if not all(isinstance(_x, (CUjit_option)) for _x in options): - raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") - cdef cydriver.CUlinkState cystate - if state is None: - pstate = 0 - elif isinstance(state, (CUlinkState,)): - pstate = int(state) - else: - pstate = int(CUlinkState(state)) - cystate = pstate - cdef cydriver.CUjitInputType cytypename = int(typename) + cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues + cdef void ** cyoptionValues_ptr + cdef vector[cydriver.CUjit_option] cyoptions cdef _HelperInputVoidPtrStruct cydataHelper - cdef void* cydata = _helper_input_void_ptr(data, &cydataHelper) - if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) - if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) - cdef vector[cydriver.CUjit_option] cyoptions = [int(pyoptions) for pyoptions in (options)] - pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cyoptionValues_ptr = voidStarHelperoptionValues.cptr - with nogil: - err = cydriver.cuLinkAddData(cystate, cytypename, cydata, size, name, numOptions, cyoptions.data(), cyoptionValues_ptr) - _helper_input_void_ptr_free(&cydataHelper) + cdef void* cydata + cdef cydriver.CUjitInputType cytypename + cdef cydriver.CUlinkState cystate + try: + if state is None: + pstate = 0 + elif isinstance(state, (CUlinkState,)): + pstate = int(state) + else: + pstate = int(CUlinkState(state)) + cystate = pstate + cytypename = int(typename) + cydata = _helper_input_void_ptr(data, &cydataHelper) + if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) + if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) + options = [] if options is None else options + if not all(isinstance(_x, (CUjit_option)) for _x in options): + raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") + cyoptions = options + optionValues = [] if optionValues is None else optionValues + pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] + voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) + cyoptionValues_ptr = voidStarHelperoptionValues.cptr + with nogil: + err = cydriver.cuLinkAddData(cystate, cytypename, cydata, size, name, numOptions, cyoptions.data(), cyoptionValues_ptr) + finally: + _helper_input_void_ptr_free(&cydataHelper) return (_CUresult(err),) {{endif}} @@ -28880,10 +28946,10 @@ def cuLinkAddFile(state, typename not None : CUjitInputType, char* path, unsigne ----- For LTO-IR input, only LTO-IR compiled with toolkits prior to CUDA 12.0 will be accepted """ - optionValues = [] if optionValues is None else optionValues - options = [] if options is None else options - if not all(isinstance(_x, (CUjit_option)) for _x in options): - raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") + cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues + cdef void ** cyoptionValues_ptr + cdef vector[cydriver.CUjit_option] cyoptions + cdef cydriver.CUjitInputType cytypename cdef cydriver.CUlinkState cystate if state is None: pstate = 0 @@ -28892,13 +28958,17 @@ def cuLinkAddFile(state, typename not None : CUjitInputType, char* path, unsigne else: pstate = int(CUlinkState(state)) cystate = pstate - cdef cydriver.CUjitInputType cytypename = int(typename) + cytypename = int(typename) if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) - cdef vector[cydriver.CUjit_option] cyoptions = [int(pyoptions) for pyoptions in (options)] - pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cyoptionValues_ptr = voidStarHelperoptionValues.cptr + options = [] if options is None else options + if not all(isinstance(_x, (CUjit_option)) for _x in options): + raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") + cyoptions = options + optionValues = [] if optionValues is None else optionValues + pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] + voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) + cyoptionValues_ptr = voidStarHelperoptionValues.cptr with nogil: err = cydriver.cuLinkAddFile(cystate, cytypename, path, numOptions, cyoptions.data(), cyoptionValues_ptr) return (_CUresult(err),) @@ -28934,6 +29004,8 @@ def cuLinkComplete(state): -------- :py:obj:`~.cuLinkCreate`, :py:obj:`~.cuLinkAddData`, :py:obj:`~.cuLinkAddFile`, :py:obj:`~.cuLinkDestroy`, :py:obj:`~.cuModuleLoadData` """ + cdef size_t sizeOut = 0 + cdef void_ptr cubinOut = 0 cdef cydriver.CUlinkState cystate if state is None: pstate = 0 @@ -28942,8 +29014,6 @@ def cuLinkComplete(state): else: pstate = int(CUlinkState(state)) cystate = pstate - cdef void_ptr cubinOut = 0 - cdef size_t sizeOut = 0 with nogil: err = cydriver.cuLinkComplete(cystate, &cubinOut, &sizeOut) if err != cydriver.CUDA_SUCCESS: @@ -29018,6 +29088,8 @@ def cuModuleGetTexRef(hmod, char* name): :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetSurfRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload` """ cdef cydriver.CUmodule cyhmod + cdef CUtexref pTexRef + pTexRef = CUtexref() if hmod is None: phmod = 0 elif isinstance(hmod, (CUmodule,)): @@ -29025,7 +29097,6 @@ def cuModuleGetTexRef(hmod, char* name): else: phmod = int(CUmodule(hmod)) cyhmod = phmod - cdef CUtexref pTexRef = CUtexref() with nogil: err = cydriver.cuModuleGetTexRef(pTexRef._pvt_ptr, cyhmod, name) if err != cydriver.CUDA_SUCCESS: @@ -29065,6 +29136,8 @@ def cuModuleGetSurfRef(hmod, char* name): :py:obj:`~.cuModuleGetFunction`, :py:obj:`~.cuModuleGetGlobal`, :py:obj:`~.cuModuleGetTexRef`, :py:obj:`~.cuModuleLoad`, :py:obj:`~.cuModuleLoadData`, :py:obj:`~.cuModuleLoadDataEx`, :py:obj:`~.cuModuleLoadFatBinary`, :py:obj:`~.cuModuleUnload` """ cdef cydriver.CUmodule cyhmod + cdef CUsurfref pSurfRef + pSurfRef = CUsurfref() if hmod is None: phmod = 0 elif isinstance(hmod, (CUmodule,)): @@ -29072,7 +29145,6 @@ def cuModuleGetSurfRef(hmod, char* name): else: phmod = int(CUmodule(hmod)) cyhmod = phmod - cdef CUsurfref pSurfRef = CUsurfref() with nogil: err = cydriver.cuModuleGetSurfRef(pSurfRef._pvt_ptr, cyhmod, name) if err != cydriver.CUDA_SUCCESS: @@ -29149,32 +29221,42 @@ def cuLibraryLoadData(code, jitOptions : Optional[tuple[CUjit_option] | list[CUj ----- If the library contains managed variables and no device in the system supports managed variables this call is expected to return :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` """ - libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues - libraryOptions = [] if libraryOptions is None else libraryOptions - if not all(isinstance(_x, (CUlibraryOption)) for _x in libraryOptions): - raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cydriver.CUlibraryOption] or list[cydriver.CUlibraryOption]") - jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues - jitOptions = [] if jitOptions is None else jitOptions - if not all(isinstance(_x, (CUjit_option)) for _x in jitOptions): - raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") - cdef CUlibrary library = CUlibrary() + cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues + cdef void** cylibraryOptionValues_ptr + cdef vector[cydriver.CUlibraryOption] cylibraryOptions + cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues + cdef void** cyjitOptionsValues_ptr + cdef vector[cydriver.CUjit_option] cyjitOptions cdef _HelperInputVoidPtrStruct cycodeHelper - cdef void* cycode = _helper_input_void_ptr(code, &cycodeHelper) - cdef vector[cydriver.CUjit_option] cyjitOptions = [int(pyjitOptions) for pyjitOptions in (jitOptions)] - pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr - if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) - if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) - cdef vector[cydriver.CUlibraryOption] cylibraryOptions = [int(pylibraryOptions) for pylibraryOptions in (libraryOptions)] - pylist = [_HelperCUlibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr - if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions)) - if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions)) - with nogil: - err = cydriver.cuLibraryLoadData(library._pvt_ptr, cycode, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions) - _helper_input_void_ptr_free(&cycodeHelper) + cdef void* cycode + cdef CUlibrary library + try: + library = CUlibrary() + cycode = _helper_input_void_ptr(code, &cycodeHelper) + jitOptions = [] if jitOptions is None else jitOptions + if not all(isinstance(_x, (CUjit_option)) for _x in jitOptions): + raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") + cyjitOptions = jitOptions + jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues + pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] + voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) + cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr + if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) + if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) + libraryOptions = [] if libraryOptions is None else libraryOptions + if not all(isinstance(_x, (CUlibraryOption)) for _x in libraryOptions): + raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cydriver.CUlibraryOption] or list[cydriver.CUlibraryOption]") + cylibraryOptions = libraryOptions + libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues + pylist = [_HelperCUlibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] + voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) + cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr + if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions)) + if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions)) + with nogil: + err = cydriver.cuLibraryLoadData(library._pvt_ptr, cycode, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions) + finally: + _helper_input_void_ptr_free(&cycodeHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, library) @@ -29249,25 +29331,32 @@ def cuLibraryLoadFromFile(char* fileName, jitOptions : Optional[tuple[CUjit_opti ----- If the library contains managed variables and no device in the system supports managed variables this call is expected to return :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` """ - libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues - libraryOptions = [] if libraryOptions is None else libraryOptions - if not all(isinstance(_x, (CUlibraryOption)) for _x in libraryOptions): - raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cydriver.CUlibraryOption] or list[cydriver.CUlibraryOption]") - jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues + cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues + cdef void** cylibraryOptionValues_ptr + cdef vector[cydriver.CUlibraryOption] cylibraryOptions + cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues + cdef void** cyjitOptionsValues_ptr + cdef vector[cydriver.CUjit_option] cyjitOptions + cdef CUlibrary library + library = CUlibrary() jitOptions = [] if jitOptions is None else jitOptions if not all(isinstance(_x, (CUjit_option)) for _x in jitOptions): raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") - cdef CUlibrary library = CUlibrary() - cdef vector[cydriver.CUjit_option] cyjitOptions = [int(pyjitOptions) for pyjitOptions in (jitOptions)] + cyjitOptions = jitOptions + jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr + voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) + cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) - cdef vector[cydriver.CUlibraryOption] cylibraryOptions = [int(pylibraryOptions) for pylibraryOptions in (libraryOptions)] + libraryOptions = [] if libraryOptions is None else libraryOptions + if not all(isinstance(_x, (CUlibraryOption)) for _x in libraryOptions): + raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cydriver.CUlibraryOption] or list[cydriver.CUlibraryOption]") + cylibraryOptions = libraryOptions + libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues pylist = [_HelperCUlibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr + voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) + cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions)) if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions)) with nogil: @@ -29341,6 +29430,8 @@ def cuLibraryGetKernel(library, char* name): :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuKernelGetFunction`, :py:obj:`~.cuLibraryGetModule`, :py:obj:`~.cuModuleGetFunction` """ cdef cydriver.CUlibrary cylibrary + cdef CUkernel pKernel + pKernel = CUkernel() if library is None: plibrary = 0 elif isinstance(library, (CUlibrary,)): @@ -29348,7 +29439,6 @@ def cuLibraryGetKernel(library, char* name): else: plibrary = int(CUlibrary(library)) cylibrary = plibrary - cdef CUkernel pKernel = CUkernel() with nogil: err = cydriver.cuLibraryGetKernel(pKernel._pvt_ptr, cylibrary, name) if err != cydriver.CUDA_SUCCESS: @@ -29377,6 +29467,7 @@ def cuLibraryGetKernelCount(lib): Number of kernels found within the library """ cdef cydriver.CUlibrary cylib + cdef unsigned int count = 0 if lib is None: plib = 0 elif isinstance(lib, (CUlibrary,)): @@ -29384,7 +29475,6 @@ def cuLibraryGetKernelCount(lib): else: plib = int(CUlibrary(lib)) cylib = plib - cdef unsigned int count = 0 with nogil: err = cydriver.cuLibraryGetKernelCount(&count, cylib) if err != cydriver.CUDA_SUCCESS: @@ -29421,25 +29511,27 @@ def cuLibraryEnumerateKernels(unsigned int numKernels, lib): :py:obj:`~.cuLibraryGetKernelCount` """ cdef cydriver.CUlibrary cylib - if lib is None: - plib = 0 - elif isinstance(lib, (CUlibrary,)): - plib = int(lib) - else: - plib = int(CUlibrary(lib)) - cylib = plib cdef cydriver.CUkernel* cykernels = NULL pykernels = [] - if numKernels != 0: - cykernels = calloc(numKernels, sizeof(cydriver.CUkernel)) - if cykernels is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(numKernels) + 'x' + str(sizeof(cydriver.CUkernel))) - with nogil: - err = cydriver.cuLibraryEnumerateKernels(cykernels, numKernels, cylib) - if CUresult(err) == CUresult(0): - pykernels = [CUkernel(init_value=cykernels[idx]) for idx in range(numKernels)] - if cykernels is not NULL: - free(cykernels) + try: + if numKernels != 0: + cykernels = calloc(numKernels, sizeof(cydriver.CUkernel)) + if cykernels is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(numKernels) + 'x' + str(sizeof(cydriver.CUkernel))) + if lib is None: + plib = 0 + elif isinstance(lib, (CUlibrary,)): + plib = int(lib) + else: + plib = int(CUlibrary(lib)) + cylib = plib + with nogil: + err = cydriver.cuLibraryEnumerateKernels(cykernels, numKernels, cylib) + finally: + if CUresult(err) == CUresult(0): + pykernels = [CUkernel(init_value=cykernels[idx]) for idx in range(numKernels)] + if cykernels is not NULL: + free(cykernels) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, pykernels) @@ -29472,6 +29564,8 @@ def cuLibraryGetModule(library): :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuModuleGetFunction` """ cdef cydriver.CUlibrary cylibrary + cdef CUmodule pMod + pMod = CUmodule() if library is None: plibrary = 0 elif isinstance(library, (CUlibrary,)): @@ -29479,7 +29573,6 @@ def cuLibraryGetModule(library): else: plibrary = int(CUlibrary(library)) cylibrary = plibrary - cdef CUmodule pMod = CUmodule() with nogil: err = cydriver.cuLibraryGetModule(pMod._pvt_ptr, cylibrary) if err != cydriver.CUDA_SUCCESS: @@ -29514,6 +29607,8 @@ def cuKernelGetFunction(kernel): :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuLibraryGetModule`, :py:obj:`~.cuModuleGetFunction` """ cdef cydriver.CUkernel cykernel + cdef CUfunction pFunc + pFunc = CUfunction() if kernel is None: pkernel = 0 elif isinstance(kernel, (CUkernel,)): @@ -29521,7 +29616,6 @@ def cuKernelGetFunction(kernel): else: pkernel = int(CUkernel(kernel)) cykernel = pkernel - cdef CUfunction pFunc = CUfunction() with nogil: err = cydriver.cuKernelGetFunction(pFunc._pvt_ptr, cykernel) if err != cydriver.CUDA_SUCCESS: @@ -29555,6 +29649,8 @@ def cuKernelGetLibrary(kernel): :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuLibraryGetKernel` """ cdef cydriver.CUkernel cykernel + cdef CUlibrary pLib + pLib = CUlibrary() if kernel is None: pkernel = 0 elif isinstance(kernel, (CUkernel,)): @@ -29562,7 +29658,6 @@ def cuKernelGetLibrary(kernel): else: pkernel = int(CUkernel(kernel)) cykernel = pkernel - cdef CUlibrary pLib = CUlibrary() with nogil: err = cydriver.cuKernelGetLibrary(pLib._pvt_ptr, cykernel) if err != cydriver.CUDA_SUCCESS: @@ -29603,6 +29698,9 @@ def cuLibraryGetGlobal(library, char* name): :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`, :py:obj:`~.cuLibraryGetModule`, :py:obj:`~.cuModuleGetGlobal` """ cdef cydriver.CUlibrary cylibrary + cdef size_t numbytes = 0 + cdef CUdeviceptr dptr + dptr = CUdeviceptr() if library is None: plibrary = 0 elif isinstance(library, (CUlibrary,)): @@ -29610,8 +29708,6 @@ def cuLibraryGetGlobal(library, char* name): else: plibrary = int(CUlibrary(library)) cylibrary = plibrary - cdef CUdeviceptr dptr = CUdeviceptr() - cdef size_t numbytes = 0 with nogil: err = cydriver.cuLibraryGetGlobal(dptr._pvt_ptr, &numbytes, cylibrary, name) if err != cydriver.CUDA_SUCCESS: @@ -29654,6 +29750,9 @@ def cuLibraryGetManaged(library, char* name): :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload` """ cdef cydriver.CUlibrary cylibrary + cdef size_t numbytes = 0 + cdef CUdeviceptr dptr + dptr = CUdeviceptr() if library is None: plibrary = 0 elif isinstance(library, (CUlibrary,)): @@ -29661,8 +29760,6 @@ def cuLibraryGetManaged(library, char* name): else: plibrary = int(CUlibrary(library)) cylibrary = plibrary - cdef CUdeviceptr dptr = CUdeviceptr() - cdef size_t numbytes = 0 with nogil: err = cydriver.cuLibraryGetManaged(dptr._pvt_ptr, &numbytes, cylibrary, name) if err != cydriver.CUDA_SUCCESS: @@ -29702,6 +29799,7 @@ def cuLibraryGetUnifiedFunction(library, char* symbol): :py:obj:`~.cuLibraryLoadData`, :py:obj:`~.cuLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload` """ cdef cydriver.CUlibrary cylibrary + cdef void_ptr fptr = 0 if library is None: plibrary = 0 elif isinstance(library, (CUlibrary,)): @@ -29709,7 +29807,6 @@ def cuLibraryGetUnifiedFunction(library, char* symbol): else: plibrary = int(CUlibrary(library)) cylibrary = plibrary - cdef void_ptr fptr = 0 with nogil: err = cydriver.cuLibraryGetUnifiedFunction(&fptr, cylibrary, symbol) if err != cydriver.CUDA_SUCCESS: @@ -29828,14 +29925,10 @@ def cuKernelGetAttribute(attrib not None : CUfunction_attribute, kernel, dev): If another thread is trying to set the same attribute on the same device using :py:obj:`~.cuKernelSetAttribute()` simultaneously, the attribute query will give the old or new value depending on the interleavings chosen by the OS scheduler and memory consistency. """ cdef cydriver.CUdevice cydev - if dev is None: - pdev = 0 - elif isinstance(dev, (CUdevice,)): - pdev = int(dev) - else: - pdev = int(CUdevice(dev)) - cydev = pdev cdef cydriver.CUkernel cykernel + cdef cydriver.CUfunction_attribute cyattrib + cdef int pi = 0 + cyattrib = int(attrib) if kernel is None: pkernel = 0 elif isinstance(kernel, (CUkernel,)): @@ -29843,8 +29936,13 @@ def cuKernelGetAttribute(attrib not None : CUfunction_attribute, kernel, dev): else: pkernel = int(CUkernel(kernel)) cykernel = pkernel - cdef int pi = 0 - cdef cydriver.CUfunction_attribute cyattrib = int(attrib) + if dev is None: + pdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + else: + pdev = int(CUdevice(dev)) + cydev = pdev with nogil: err = cydriver.cuKernelGetAttribute(&pi, cyattrib, cykernel, cydev) if err != cydriver.CUDA_SUCCESS: @@ -29946,14 +30044,9 @@ def cuKernelSetAttribute(attrib not None : CUfunction_attribute, int val, kernel The API has stricter locking requirements in comparison to its legacy counterpart :py:obj:`~.cuFuncSetAttribute()` due to device-wide semantics. If multiple threads are trying to set the same attribute on the same device simultaneously, the attribute setting will depend on the interleavings chosen by the OS scheduler and memory consistency. """ cdef cydriver.CUdevice cydev - if dev is None: - pdev = 0 - elif isinstance(dev, (CUdevice,)): - pdev = int(dev) - else: - pdev = int(CUdevice(dev)) - cydev = pdev cdef cydriver.CUkernel cykernel + cdef cydriver.CUfunction_attribute cyattrib + cyattrib = int(attrib) if kernel is None: pkernel = 0 elif isinstance(kernel, (CUkernel,)): @@ -29961,7 +30054,13 @@ def cuKernelSetAttribute(attrib not None : CUfunction_attribute, int val, kernel else: pkernel = int(CUkernel(kernel)) cykernel = pkernel - cdef cydriver.CUfunction_attribute cyattrib = int(attrib) + if dev is None: + pdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + else: + pdev = int(CUdevice(dev)) + cydev = pdev with nogil: err = cydriver.cuKernelSetAttribute(cyattrib, val, cykernel, cydev) return (_CUresult(err),) @@ -30030,13 +30129,7 @@ def cuKernelSetCacheConfig(kernel, config not None : CUfunc_cache, dev): The API has stricter locking requirements in comparison to its legacy counterpart :py:obj:`~.cuFuncSetCacheConfig()` due to device-wide semantics. If multiple threads are trying to set a config on the same device simultaneously, the cache config setting will depend on the interleavings chosen by the OS scheduler and memory consistency. """ cdef cydriver.CUdevice cydev - if dev is None: - pdev = 0 - elif isinstance(dev, (CUdevice,)): - pdev = int(dev) - else: - pdev = int(CUdevice(dev)) - cydev = pdev + cdef cydriver.CUfunc_cache cyconfig cdef cydriver.CUkernel cykernel if kernel is None: pkernel = 0 @@ -30045,7 +30138,14 @@ def cuKernelSetCacheConfig(kernel, config not None : CUfunc_cache, dev): else: pkernel = int(CUkernel(kernel)) cykernel = pkernel - cdef cydriver.CUfunc_cache cyconfig = int(config) + cyconfig = int(config) + if dev is None: + pdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + else: + pdev = int(CUdevice(dev)) + cydev = pdev with nogil: err = cydriver.cuKernelSetCacheConfig(cykernel, cyconfig, cydev) return (_CUresult(err),) @@ -30078,6 +30178,7 @@ def cuKernelGetName(hfunc): The returned name of the function """ cdef cydriver.CUkernel cyhfunc + cdef const char* name = NULL if hfunc is None: phfunc = 0 elif isinstance(hfunc, (CUkernel,)): @@ -30085,7 +30186,6 @@ def cuKernelGetName(hfunc): else: phfunc = int(CUkernel(hfunc)) cyhfunc = phfunc - cdef const char* name = NULL with nogil: err = cydriver.cuKernelGetName(&name, cyhfunc) if err != cydriver.CUDA_SUCCESS: @@ -30131,6 +30231,8 @@ def cuKernelGetParamInfo(kernel, size_t paramIndex): -------- :py:obj:`~.cuFuncGetParamInfo` """ + cdef size_t paramSize = 0 + cdef size_t paramOffset = 0 cdef cydriver.CUkernel cykernel if kernel is None: pkernel = 0 @@ -30139,8 +30241,6 @@ def cuKernelGetParamInfo(kernel, size_t paramIndex): else: pkernel = int(CUkernel(kernel)) cykernel = pkernel - cdef size_t paramOffset = 0 - cdef size_t paramSize = 0 with nogil: err = cydriver.cuKernelGetParamInfo(cykernel, paramIndex, ¶mOffset, ¶mSize) if err != cydriver.CUDA_SUCCESS: @@ -30184,8 +30284,8 @@ def cuMemGetInfo(): -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemGetInfo` """ - cdef size_t free = 0 cdef size_t total = 0 + cdef size_t free = 0 with nogil: err = cydriver.cuMemGetInfo(&free, &total) if err != cydriver.CUDA_SUCCESS: @@ -30221,7 +30321,8 @@ def cuMemAlloc(size_t bytesize): -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMalloc` """ - cdef CUdeviceptr dptr = CUdeviceptr() + cdef CUdeviceptr dptr + dptr = CUdeviceptr() with nogil: err = cydriver.cuMemAlloc(dptr._pvt_ptr, bytesize) if err != cydriver.CUDA_SUCCESS: @@ -30289,8 +30390,9 @@ def cuMemAllocPitch(size_t WidthInBytes, size_t Height, unsigned int ElementSize -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMallocPitch` """ - cdef CUdeviceptr dptr = CUdeviceptr() cdef size_t pPitch = 0 + cdef CUdeviceptr dptr + dptr = CUdeviceptr() with nogil: err = cydriver.cuMemAllocPitch(dptr._pvt_ptr, &pPitch, WidthInBytes, Height, ElementSizeBytes) if err != cydriver.CUDA_SUCCESS: @@ -30376,6 +30478,9 @@ def cuMemGetAddressRange(dptr): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32` """ cdef cydriver.CUdeviceptr cydptr + cdef size_t psize = 0 + cdef CUdeviceptr pbase + pbase = CUdeviceptr() if dptr is None: pdptr = 0 elif isinstance(dptr, (CUdeviceptr,)): @@ -30383,8 +30488,6 @@ def cuMemGetAddressRange(dptr): else: pdptr = int(CUdeviceptr(dptr)) cydptr = pdptr - cdef CUdeviceptr pbase = CUdeviceptr() - cdef size_t psize = 0 with nogil: err = cydriver.cuMemGetAddressRange(pbase._pvt_ptr, &psize, cydptr) if err != cydriver.CUDA_SUCCESS: @@ -30473,10 +30576,13 @@ def cuMemFreeHost(p): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaFreeHost` """ cdef _HelperInputVoidPtrStruct cypHelper - cdef void* cyp = _helper_input_void_ptr(p, &cypHelper) - with nogil: - err = cydriver.cuMemFreeHost(cyp) - _helper_input_void_ptr_free(&cypHelper) + cdef void* cyp + try: + cyp = _helper_input_void_ptr(p, &cypHelper) + with nogil: + err = cydriver.cuMemFreeHost(cyp) + finally: + _helper_input_void_ptr_free(&cypHelper) return (_CUresult(err),) {{endif}} @@ -30626,12 +30732,16 @@ def cuMemHostGetDevicePointer(p, unsigned int Flags): -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaHostGetDevicePointer` """ - cdef CUdeviceptr pdptr = CUdeviceptr() cdef _HelperInputVoidPtrStruct cypHelper - cdef void* cyp = _helper_input_void_ptr(p, &cypHelper) - with nogil: - err = cydriver.cuMemHostGetDevicePointer(pdptr._pvt_ptr, cyp, Flags) - _helper_input_void_ptr_free(&cypHelper) + cdef void* cyp + cdef CUdeviceptr pdptr + try: + pdptr = CUdeviceptr() + cyp = _helper_input_void_ptr(p, &cypHelper) + with nogil: + err = cydriver.cuMemHostGetDevicePointer(pdptr._pvt_ptr, cyp, Flags) + finally: + _helper_input_void_ptr_free(&cypHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, pdptr) @@ -30666,12 +30776,15 @@ def cuMemHostGetFlags(p): -------- :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cudaHostGetFlags` """ - cdef unsigned int pFlags = 0 cdef _HelperInputVoidPtrStruct cypHelper - cdef void* cyp = _helper_input_void_ptr(p, &cypHelper) - with nogil: - err = cydriver.cuMemHostGetFlags(&pFlags, cyp) - _helper_input_void_ptr_free(&cypHelper) + cdef void* cyp + cdef unsigned int pFlags = 0 + try: + cyp = _helper_input_void_ptr(p, &cypHelper) + with nogil: + err = cydriver.cuMemHostGetFlags(&pFlags, cyp) + finally: + _helper_input_void_ptr_free(&cypHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, pFlags) @@ -30806,7 +30919,8 @@ def cuMemAllocManaged(size_t bytesize, unsigned int flags): -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuStreamAttachMemAsync`, :py:obj:`~.cudaMallocManaged` """ - cdef CUdeviceptr dptr = CUdeviceptr() + cdef CUdeviceptr dptr + dptr = CUdeviceptr() with nogil: err = cydriver.cuMemAllocManaged(dptr._pvt_ptr, bytesize, flags) if err != cydriver.CUDA_SUCCESS: @@ -30871,40 +30985,43 @@ def cuDeviceRegisterAsyncNotification(device, callbackFunc, userData): -------- :py:obj:`~.cuDeviceUnregisterAsyncNotification` """ - cdef cydriver.CUasyncCallback cycallbackFunc - if callbackFunc is None: - pcallbackFunc = 0 - elif isinstance(callbackFunc, (CUasyncCallback,)): - pcallbackFunc = int(callbackFunc) - else: - pcallbackFunc = int(CUasyncCallback(callbackFunc)) - cycallbackFunc = pcallbackFunc - cdef cydriver.CUdevice cydevice - if device is None: - pdevice = 0 - elif isinstance(device, (CUdevice,)): - pdevice = int(device) - else: - pdevice = int(CUdevice(device)) - cydevice = pdevice + cdef CUasyncCallbackHandle callback cdef _HelperInputVoidPtrStruct cyuserDataHelper - cdef void* cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) - + cdef void* cyuserData cdef cuAsyncCallbackData *cbData = NULL - cbData = malloc(sizeof(cbData[0])) - if cbData == NULL: - return (CUresult.CUDA_ERROR_OUT_OF_MEMORY, None) - cbData.callback = cycallbackFunc - cbData.userData = cyuserData - - cdef CUasyncCallbackHandle callback = CUasyncCallbackHandle() - with nogil: - err = cydriver.cuDeviceRegisterAsyncNotification(cydevice, cuAsyncNotificationCallbackWrapper, cbData, callback._pvt_ptr) - if err != cydriver.CUDA_SUCCESS: - free(cbData) - else: - m_global._allocated[int(callback)] = cbData - _helper_input_void_ptr_free(&cyuserDataHelper) + cdef cydriver.CUasyncCallback cycallbackFunc + cdef cydriver.CUdevice cydevice + try: + if device is None: + pdevice = 0 + elif isinstance(device, (CUdevice,)): + pdevice = int(device) + else: + pdevice = int(CUdevice(device)) + cydevice = pdevice + if callbackFunc is None: + pcallbackFunc = 0 + elif isinstance(callbackFunc, (CUasyncCallback,)): + pcallbackFunc = int(callbackFunc) + else: + pcallbackFunc = int(CUasyncCallback(callbackFunc)) + cycallbackFunc = pcallbackFunc + cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) + cbData = malloc(sizeof(cbData[0])) + if cbData == NULL: + return (CUresult.CUDA_ERROR_OUT_OF_MEMORY, None) + cbData.callback = cycallbackFunc + cbData.userData = cyuserData + + callback = CUasyncCallbackHandle() + with nogil: + err = cydriver.cuDeviceRegisterAsyncNotification(cydevice, cuAsyncNotificationCallbackWrapper, cbData, callback._pvt_ptr) + finally: + if err != cydriver.CUDA_SUCCESS: + free(cbData) + else: + m_global._allocated[int(callback)] = cbData + _helper_input_void_ptr_free(&cyuserDataHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, callback) @@ -30937,26 +31054,28 @@ def cuDeviceUnregisterAsyncNotification(device, callback): :py:obj:`~.cuDeviceRegisterAsyncNotification` """ cdef cydriver.CUasyncCallbackHandle cycallback - if callback is None: - pcallback = 0 - elif isinstance(callback, (CUasyncCallbackHandle,)): - pcallback = int(callback) - else: - pcallback = int(CUasyncCallbackHandle(callback)) - cycallback = pcallback cdef cydriver.CUdevice cydevice - if device is None: - pdevice = 0 - elif isinstance(device, (CUdevice,)): - pdevice = int(device) - else: - pdevice = int(CUdevice(device)) - cydevice = pdevice - with nogil: - err = cydriver.cuDeviceUnregisterAsyncNotification(cydevice, cycallback) - if err == cydriver.CUDA_SUCCESS: - free(m_global._allocated[pcallback]) - m_global._allocated.erase(pcallback) + try: + if device is None: + pdevice = 0 + elif isinstance(device, (CUdevice,)): + pdevice = int(device) + else: + pdevice = int(CUdevice(device)) + cydevice = pdevice + if callback is None: + pcallback = 0 + elif isinstance(callback, (CUasyncCallbackHandle,)): + pcallback = int(callback) + else: + pcallback = int(CUasyncCallbackHandle(callback)) + cycallback = pcallback + with nogil: + err = cydriver.cuDeviceUnregisterAsyncNotification(cydevice, cycallback) + finally: + if err == cydriver.CUDA_SUCCESS: + free(m_global._allocated[pcallback]) + m_global._allocated.erase(pcallback) return (_CUresult(err),) {{endif}} @@ -30987,7 +31106,8 @@ def cuDeviceGetByPCIBusId(char* pciBusId): -------- :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetPCIBusId`, :py:obj:`~.cudaDeviceGetByPCIBusId` """ - cdef CUdevice dev = CUdevice() + cdef CUdevice dev + dev = CUdevice() with nogil: err = cydriver.cuDeviceGetByPCIBusId(dev._pvt_ptr, pciBusId) if err != cydriver.CUDA_SUCCESS: @@ -31028,6 +31148,9 @@ def cuDeviceGetPCIBusId(int length, dev): :py:obj:`~.cuDeviceGet`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetByPCIBusId`, :py:obj:`~.cudaDeviceGetPCIBusId` """ cdef cydriver.CUdevice cydev + cdef char * pciBusId + pypciBusId = b" " * length + pciBusId = pypciBusId if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -31035,8 +31158,6 @@ def cuDeviceGetPCIBusId(int length, dev): else: pdev = int(CUdevice(dev)) cydev = pdev - pypciBusId = b" " * length - cdef char* pciBusId = pypciBusId with nogil: err = cydriver.cuDeviceGetPCIBusId(pciBusId, length, cydev) if err != cydriver.CUDA_SUCCESS: @@ -31090,6 +31211,8 @@ def cuIpcGetEventHandle(event): :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuIpcOpenEventHandle`, :py:obj:`~.cuIpcGetMemHandle`, :py:obj:`~.cuIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`, :py:obj:`~.cudaIpcGetEventHandle` """ cdef cydriver.CUevent cyevent + cdef CUipcEventHandle pHandle + pHandle = CUipcEventHandle() if event is None: pevent = 0 elif isinstance(event, (CUevent,)): @@ -31097,7 +31220,6 @@ def cuIpcGetEventHandle(event): else: pevent = int(CUevent(event)) cyevent = pevent - cdef CUipcEventHandle pHandle = CUipcEventHandle() with nogil: err = cydriver.cuIpcGetEventHandle(pHandle._pvt_ptr, cyevent) if err != cydriver.CUDA_SUCCESS: @@ -31144,7 +31266,8 @@ def cuIpcOpenEventHandle(handle not None : CUipcEventHandle): -------- :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuIpcGetEventHandle`, :py:obj:`~.cuIpcGetMemHandle`, :py:obj:`~.cuIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`, :py:obj:`~.cudaIpcOpenEventHandle` """ - cdef CUevent phEvent = CUevent() + cdef CUevent phEvent + phEvent = CUevent() with nogil: err = cydriver.cuIpcOpenEventHandle(phEvent._pvt_ptr, handle._pvt_ptr[0]) if err != cydriver.CUDA_SUCCESS: @@ -31193,6 +31316,8 @@ def cuIpcGetMemHandle(dptr): :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuIpcGetEventHandle`, :py:obj:`~.cuIpcOpenEventHandle`, :py:obj:`~.cuIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`, :py:obj:`~.cudaIpcGetMemHandle` """ cdef cydriver.CUdeviceptr cydptr + cdef CUipcMemHandle pHandle + pHandle = CUipcMemHandle() if dptr is None: pdptr = 0 elif isinstance(dptr, (CUdeviceptr,)): @@ -31200,7 +31325,6 @@ def cuIpcGetMemHandle(dptr): else: pdptr = int(CUdeviceptr(dptr)) cydptr = pdptr - cdef CUipcMemHandle pHandle = CUipcMemHandle() with nogil: err = cydriver.cuIpcGetMemHandle(pHandle._pvt_ptr, cydptr) if err != cydriver.CUDA_SUCCESS: @@ -31269,7 +31393,8 @@ def cuIpcOpenMemHandle(handle not None : CUipcMemHandle, unsigned int Flags): ----- No guarantees are made about the address returned in `*pdptr`. In particular, multiple processes may not receive the same address for the same `handle`. """ - cdef CUdeviceptr pdptr = CUdeviceptr() + cdef CUdeviceptr pdptr + pdptr = CUdeviceptr() with nogil: err = cydriver.cuIpcOpenMemHandle(pdptr._pvt_ptr, handle._pvt_ptr[0], Flags) if err != cydriver.CUDA_SUCCESS: @@ -31425,10 +31550,13 @@ def cuMemHostRegister(p, size_t bytesize, unsigned int Flags): :py:obj:`~.cuMemHostUnregister`, :py:obj:`~.cuMemHostGetFlags`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cudaHostRegister` """ cdef _HelperInputVoidPtrStruct cypHelper - cdef void* cyp = _helper_input_void_ptr(p, &cypHelper) - with nogil: - err = cydriver.cuMemHostRegister(cyp, bytesize, Flags) - _helper_input_void_ptr_free(&cypHelper) + cdef void* cyp + try: + cyp = _helper_input_void_ptr(p, &cypHelper) + with nogil: + err = cydriver.cuMemHostRegister(cyp, bytesize, Flags) + finally: + _helper_input_void_ptr_free(&cypHelper) return (_CUresult(err),) {{endif}} @@ -31459,10 +31587,13 @@ def cuMemHostUnregister(p): :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cudaHostUnregister` """ cdef _HelperInputVoidPtrStruct cypHelper - cdef void* cyp = _helper_input_void_ptr(p, &cypHelper) - with nogil: - err = cydriver.cuMemHostUnregister(cyp) - _helper_input_void_ptr_free(&cypHelper) + cdef void* cyp + try: + cyp = _helper_input_void_ptr(p, &cypHelper) + with nogil: + err = cydriver.cuMemHostUnregister(cyp) + finally: + _helper_input_void_ptr_free(&cypHelper) return (_CUresult(err),) {{endif}} @@ -31498,13 +31629,6 @@ def cuMemcpy(dst, src, size_t ByteCount): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol` """ cdef cydriver.CUdeviceptr cysrc - if src is None: - psrc = 0 - elif isinstance(src, (CUdeviceptr,)): - psrc = int(src) - else: - psrc = int(CUdeviceptr(src)) - cysrc = psrc cdef cydriver.CUdeviceptr cydst if dst is None: pdst = 0 @@ -31513,6 +31637,13 @@ def cuMemcpy(dst, src, size_t ByteCount): else: pdst = int(CUdeviceptr(dst)) cydst = pdst + if src is None: + psrc = 0 + elif isinstance(src, (CUdeviceptr,)): + psrc = int(src) + else: + psrc = int(CUdeviceptr(src)) + cysrc = psrc with nogil: err = cydriver.cuMemcpy(cydst, cysrc, ByteCount) return (_CUresult(err),) @@ -31553,29 +31684,8 @@ def cuMemcpyPeer(dstDevice, dstContext, srcDevice, srcContext, size_t ByteCount) :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpy3DPeer`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cudaMemcpyPeer` """ cdef cydriver.CUcontext cysrcContext - if srcContext is None: - psrcContext = 0 - elif isinstance(srcContext, (CUcontext,)): - psrcContext = int(srcContext) - else: - psrcContext = int(CUcontext(srcContext)) - cysrcContext = psrcContext cdef cydriver.CUdeviceptr cysrcDevice - if srcDevice is None: - psrcDevice = 0 - elif isinstance(srcDevice, (CUdeviceptr,)): - psrcDevice = int(srcDevice) - else: - psrcDevice = int(CUdeviceptr(srcDevice)) - cysrcDevice = psrcDevice cdef cydriver.CUcontext cydstContext - if dstContext is None: - pdstContext = 0 - elif isinstance(dstContext, (CUcontext,)): - pdstContext = int(dstContext) - else: - pdstContext = int(CUcontext(dstContext)) - cydstContext = pdstContext cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -31584,6 +31694,27 @@ def cuMemcpyPeer(dstDevice, dstContext, srcDevice, srcContext, size_t ByteCount) else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if dstContext is None: + pdstContext = 0 + elif isinstance(dstContext, (CUcontext,)): + pdstContext = int(dstContext) + else: + pdstContext = int(CUcontext(dstContext)) + cydstContext = pdstContext + if srcDevice is None: + psrcDevice = 0 + elif isinstance(srcDevice, (CUdeviceptr,)): + psrcDevice = int(srcDevice) + else: + psrcDevice = int(CUdeviceptr(srcDevice)) + cysrcDevice = psrcDevice + if srcContext is None: + psrcContext = 0 + elif isinstance(srcContext, (CUcontext,)): + psrcContext = int(srcContext) + else: + psrcContext = int(CUcontext(srcContext)) + cysrcContext = psrcContext with nogil: err = cydriver.cuMemcpyPeer(cydstDevice, cydstContext, cysrcDevice, cysrcContext, ByteCount) return (_CUresult(err),) @@ -31617,19 +31748,22 @@ def cuMemcpyHtoD(dstDevice, srcHost, size_t ByteCount): -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyToSymbol` """ - cdef cydriver.CUdeviceptr cydstDevice - if dstDevice is None: - pdstDevice = 0 - elif isinstance(dstDevice, (CUdeviceptr,)): - pdstDevice = int(dstDevice) - else: - pdstDevice = int(CUdeviceptr(dstDevice)) - cydstDevice = pdstDevice cdef _HelperInputVoidPtrStruct cysrcHostHelper - cdef void* cysrcHost = _helper_input_void_ptr(srcHost, &cysrcHostHelper) - with nogil: - err = cydriver.cuMemcpyHtoD(cydstDevice, cysrcHost, ByteCount) - _helper_input_void_ptr_free(&cysrcHostHelper) + cdef void* cysrcHost + cdef cydriver.CUdeviceptr cydstDevice + try: + if dstDevice is None: + pdstDevice = 0 + elif isinstance(dstDevice, (CUdeviceptr,)): + pdstDevice = int(dstDevice) + else: + pdstDevice = int(CUdeviceptr(dstDevice)) + cydstDevice = pdstDevice + cysrcHost = _helper_input_void_ptr(srcHost, &cysrcHostHelper) + with nogil: + err = cydriver.cuMemcpyHtoD(cydstDevice, cysrcHost, ByteCount) + finally: + _helper_input_void_ptr_free(&cysrcHostHelper) return (_CUresult(err),) {{endif}} @@ -31662,18 +31796,21 @@ def cuMemcpyDtoH(dstHost, srcDevice, size_t ByteCount): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyFromSymbol` """ cdef cydriver.CUdeviceptr cysrcDevice - if srcDevice is None: - psrcDevice = 0 - elif isinstance(srcDevice, (CUdeviceptr,)): - psrcDevice = int(srcDevice) - else: - psrcDevice = int(CUdeviceptr(srcDevice)) - cysrcDevice = psrcDevice cdef _HelperInputVoidPtrStruct cydstHostHelper - cdef void* cydstHost = _helper_input_void_ptr(dstHost, &cydstHostHelper) - with nogil: - err = cydriver.cuMemcpyDtoH(cydstHost, cysrcDevice, ByteCount) - _helper_input_void_ptr_free(&cydstHostHelper) + cdef void* cydstHost + try: + cydstHost = _helper_input_void_ptr(dstHost, &cydstHostHelper) + if srcDevice is None: + psrcDevice = 0 + elif isinstance(srcDevice, (CUdeviceptr,)): + psrcDevice = int(srcDevice) + else: + psrcDevice = int(CUdeviceptr(srcDevice)) + cysrcDevice = psrcDevice + with nogil: + err = cydriver.cuMemcpyDtoH(cydstHost, cysrcDevice, ByteCount) + finally: + _helper_input_void_ptr_free(&cydstHostHelper) return (_CUresult(err),) {{endif}} @@ -31706,13 +31843,6 @@ def cuMemcpyDtoD(dstDevice, srcDevice, size_t ByteCount): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol` """ cdef cydriver.CUdeviceptr cysrcDevice - if srcDevice is None: - psrcDevice = 0 - elif isinstance(srcDevice, (CUdeviceptr,)): - psrcDevice = int(srcDevice) - else: - psrcDevice = int(CUdeviceptr(srcDevice)) - cysrcDevice = psrcDevice cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -31721,6 +31851,13 @@ def cuMemcpyDtoD(dstDevice, srcDevice, size_t ByteCount): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if srcDevice is None: + psrcDevice = 0 + elif isinstance(srcDevice, (CUdeviceptr,)): + psrcDevice = int(srcDevice) + else: + psrcDevice = int(CUdeviceptr(srcDevice)) + cysrcDevice = psrcDevice with nogil: err = cydriver.cuMemcpyDtoD(cydstDevice, cysrcDevice, ByteCount) return (_CUresult(err),) @@ -31758,13 +31895,6 @@ def cuMemcpyDtoA(dstArray, size_t dstOffset, srcDevice, size_t ByteCount): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpyToArray` """ cdef cydriver.CUdeviceptr cysrcDevice - if srcDevice is None: - psrcDevice = 0 - elif isinstance(srcDevice, (CUdeviceptr,)): - psrcDevice = int(srcDevice) - else: - psrcDevice = int(CUdeviceptr(srcDevice)) - cysrcDevice = psrcDevice cdef cydriver.CUarray cydstArray if dstArray is None: pdstArray = 0 @@ -31773,6 +31903,13 @@ def cuMemcpyDtoA(dstArray, size_t dstOffset, srcDevice, size_t ByteCount): else: pdstArray = int(CUarray(dstArray)) cydstArray = pdstArray + if srcDevice is None: + psrcDevice = 0 + elif isinstance(srcDevice, (CUdeviceptr,)): + psrcDevice = int(srcDevice) + else: + psrcDevice = int(CUdeviceptr(srcDevice)) + cysrcDevice = psrcDevice with nogil: err = cydriver.cuMemcpyDtoA(cydstArray, dstOffset, cysrcDevice, ByteCount) return (_CUresult(err),) @@ -31812,13 +31949,6 @@ def cuMemcpyAtoD(dstDevice, srcArray, size_t srcOffset, size_t ByteCount): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpyFromArray` """ cdef cydriver.CUarray cysrcArray - if srcArray is None: - psrcArray = 0 - elif isinstance(srcArray, (CUarray,)): - psrcArray = int(srcArray) - else: - psrcArray = int(CUarray(srcArray)) - cysrcArray = psrcArray cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -31827,6 +31957,13 @@ def cuMemcpyAtoD(dstDevice, srcArray, size_t srcOffset, size_t ByteCount): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if srcArray is None: + psrcArray = 0 + elif isinstance(srcArray, (CUarray,)): + psrcArray = int(srcArray) + else: + psrcArray = int(CUarray(srcArray)) + cysrcArray = psrcArray with nogil: err = cydriver.cuMemcpyAtoD(cydstDevice, cysrcArray, srcOffset, ByteCount) return (_CUresult(err),) @@ -31863,19 +32000,22 @@ def cuMemcpyHtoA(dstArray, size_t dstOffset, srcHost, size_t ByteCount): -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpyToArray` """ - cdef cydriver.CUarray cydstArray - if dstArray is None: - pdstArray = 0 - elif isinstance(dstArray, (CUarray,)): - pdstArray = int(dstArray) - else: - pdstArray = int(CUarray(dstArray)) - cydstArray = pdstArray cdef _HelperInputVoidPtrStruct cysrcHostHelper - cdef void* cysrcHost = _helper_input_void_ptr(srcHost, &cysrcHostHelper) - with nogil: - err = cydriver.cuMemcpyHtoA(cydstArray, dstOffset, cysrcHost, ByteCount) - _helper_input_void_ptr_free(&cysrcHostHelper) + cdef void* cysrcHost + cdef cydriver.CUarray cydstArray + try: + if dstArray is None: + pdstArray = 0 + elif isinstance(dstArray, (CUarray,)): + pdstArray = int(dstArray) + else: + pdstArray = int(CUarray(dstArray)) + cydstArray = pdstArray + cysrcHost = _helper_input_void_ptr(srcHost, &cysrcHostHelper) + with nogil: + err = cydriver.cuMemcpyHtoA(cydstArray, dstOffset, cysrcHost, ByteCount) + finally: + _helper_input_void_ptr_free(&cysrcHostHelper) return (_CUresult(err),) {{endif}} @@ -31911,18 +32051,21 @@ def cuMemcpyAtoH(dstHost, srcArray, size_t srcOffset, size_t ByteCount): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpyFromArray` """ cdef cydriver.CUarray cysrcArray - if srcArray is None: - psrcArray = 0 - elif isinstance(srcArray, (CUarray,)): - psrcArray = int(srcArray) - else: - psrcArray = int(CUarray(srcArray)) - cysrcArray = psrcArray cdef _HelperInputVoidPtrStruct cydstHostHelper - cdef void* cydstHost = _helper_input_void_ptr(dstHost, &cydstHostHelper) - with nogil: - err = cydriver.cuMemcpyAtoH(cydstHost, cysrcArray, srcOffset, ByteCount) - _helper_input_void_ptr_free(&cydstHostHelper) + cdef void* cydstHost + try: + cydstHost = _helper_input_void_ptr(dstHost, &cydstHostHelper) + if srcArray is None: + psrcArray = 0 + elif isinstance(srcArray, (CUarray,)): + psrcArray = int(srcArray) + else: + psrcArray = int(CUarray(srcArray)) + cysrcArray = psrcArray + with nogil: + err = cydriver.cuMemcpyAtoH(cydstHost, cysrcArray, srcOffset, ByteCount) + finally: + _helper_input_void_ptr_free(&cydstHostHelper) return (_CUresult(err),) {{endif}} @@ -31963,13 +32106,6 @@ def cuMemcpyAtoA(dstArray, size_t dstOffset, srcArray, size_t srcOffset, size_t :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpyArrayToArray` """ cdef cydriver.CUarray cysrcArray - if srcArray is None: - psrcArray = 0 - elif isinstance(srcArray, (CUarray,)): - psrcArray = int(srcArray) - else: - psrcArray = int(CUarray(srcArray)) - cysrcArray = psrcArray cdef cydriver.CUarray cydstArray if dstArray is None: pdstArray = 0 @@ -31978,6 +32114,13 @@ def cuMemcpyAtoA(dstArray, size_t dstOffset, srcArray, size_t srcOffset, size_t else: pdstArray = int(CUarray(dstArray)) cydstArray = pdstArray + if srcArray is None: + psrcArray = 0 + elif isinstance(srcArray, (CUarray,)): + psrcArray = int(srcArray) + else: + psrcArray = int(CUarray(srcArray)) + cysrcArray = psrcArray with nogil: err = cydriver.cuMemcpyAtoA(cydstArray, dstOffset, cysrcArray, srcOffset, ByteCount) return (_CUresult(err),) @@ -32104,7 +32247,8 @@ def cuMemcpy2D(pCopy : Optional[CUDA_MEMCPY2D]): -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray` """ - cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL + cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr + cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL with nogil: err = cydriver.cuMemcpy2D(cypCopy_ptr) return (_CUresult(err),) @@ -32231,7 +32375,8 @@ def cuMemcpy2DUnaligned(pCopy : Optional[CUDA_MEMCPY2D]): -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray` """ - cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL + cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr + cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL with nogil: err = cydriver.cuMemcpy2DUnaligned(cypCopy_ptr) return (_CUresult(err),) @@ -32361,7 +32506,8 @@ def cuMemcpy3D(pCopy : Optional[CUDA_MEMCPY3D]): -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemcpy3D` """ - cdef cydriver.CUDA_MEMCPY3D* cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL + cdef cydriver.CUDA_MEMCPY3D* cypCopy_ptr + cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL with nogil: err = cydriver.cuMemcpy3D(cypCopy_ptr) return (_CUresult(err),) @@ -32391,7 +32537,8 @@ def cuMemcpy3DPeer(pCopy : Optional[CUDA_MEMCPY3D_PEER]): -------- :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cudaMemcpy3DPeer` """ - cdef cydriver.CUDA_MEMCPY3D_PEER* cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL + cdef cydriver.CUDA_MEMCPY3D_PEER* cypCopy_ptr + cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL with nogil: err = cydriver.cuMemcpy3DPeer(cypCopy_ptr) return (_CUresult(err),) @@ -32431,21 +32578,7 @@ def cuMemcpyAsync(dst, src, size_t ByteCount, hStream): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cysrc - if src is None: - psrc = 0 - elif isinstance(src, (CUdeviceptr,)): - psrc = int(src) - else: - psrc = int(CUdeviceptr(src)) - cysrc = psrc cdef cydriver.CUdeviceptr cydst if dst is None: pdst = 0 @@ -32454,6 +32587,20 @@ def cuMemcpyAsync(dst, src, size_t ByteCount, hStream): else: pdst = int(CUdeviceptr(dst)) cydst = pdst + if src is None: + psrc = 0 + elif isinstance(src, (CUdeviceptr,)): + psrc = int(src) + else: + psrc = int(CUdeviceptr(src)) + cysrc = psrc + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemcpyAsync(cydst, cysrc, ByteCount, cyhStream) return (_CUresult(err),) @@ -32496,37 +32643,9 @@ def cuMemcpyPeerAsync(dstDevice, dstContext, srcDevice, srcContext, size_t ByteC :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpy3DPeer`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cudaMemcpyPeerAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUcontext cysrcContext - if srcContext is None: - psrcContext = 0 - elif isinstance(srcContext, (CUcontext,)): - psrcContext = int(srcContext) - else: - psrcContext = int(CUcontext(srcContext)) - cysrcContext = psrcContext cdef cydriver.CUdeviceptr cysrcDevice - if srcDevice is None: - psrcDevice = 0 - elif isinstance(srcDevice, (CUdeviceptr,)): - psrcDevice = int(srcDevice) - else: - psrcDevice = int(CUdeviceptr(srcDevice)) - cysrcDevice = psrcDevice cdef cydriver.CUcontext cydstContext - if dstContext is None: - pdstContext = 0 - elif isinstance(dstContext, (CUcontext,)): - pdstContext = int(dstContext) - else: - pdstContext = int(CUcontext(dstContext)) - cydstContext = pdstContext cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -32535,6 +32654,34 @@ def cuMemcpyPeerAsync(dstDevice, dstContext, srcDevice, srcContext, size_t ByteC else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if dstContext is None: + pdstContext = 0 + elif isinstance(dstContext, (CUcontext,)): + pdstContext = int(dstContext) + else: + pdstContext = int(CUcontext(dstContext)) + cydstContext = pdstContext + if srcDevice is None: + psrcDevice = 0 + elif isinstance(srcDevice, (CUdeviceptr,)): + psrcDevice = int(srcDevice) + else: + psrcDevice = int(CUdeviceptr(srcDevice)) + cysrcDevice = psrcDevice + if srcContext is None: + psrcContext = 0 + elif isinstance(srcContext, (CUcontext,)): + psrcContext = int(srcContext) + else: + psrcContext = int(CUcontext(srcContext)) + cysrcContext = psrcContext + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemcpyPeerAsync(cydstDevice, cydstContext, cysrcDevice, cysrcContext, ByteCount, cyhStream) return (_CUresult(err),) @@ -32571,26 +32718,29 @@ def cuMemcpyHtoDAsync(dstDevice, srcHost, size_t ByteCount, hStream): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - cdef cydriver.CUdeviceptr cydstDevice - if dstDevice is None: - pdstDevice = 0 - elif isinstance(dstDevice, (CUdeviceptr,)): - pdstDevice = int(dstDevice) - else: - pdstDevice = int(CUdeviceptr(dstDevice)) - cydstDevice = pdstDevice cdef _HelperInputVoidPtrStruct cysrcHostHelper - cdef void* cysrcHost = _helper_input_void_ptr(srcHost, &cysrcHostHelper) - with nogil: - err = cydriver.cuMemcpyHtoDAsync(cydstDevice, cysrcHost, ByteCount, cyhStream) - _helper_input_void_ptr_free(&cysrcHostHelper) + cdef void* cysrcHost + cdef cydriver.CUdeviceptr cydstDevice + try: + if dstDevice is None: + pdstDevice = 0 + elif isinstance(dstDevice, (CUdeviceptr,)): + pdstDevice = int(dstDevice) + else: + pdstDevice = int(CUdeviceptr(dstDevice)) + cydstDevice = pdstDevice + cysrcHost = _helper_input_void_ptr(srcHost, &cysrcHostHelper) + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + with nogil: + err = cydriver.cuMemcpyHtoDAsync(cydstDevice, cysrcHost, ByteCount, cyhStream) + finally: + _helper_input_void_ptr_free(&cysrcHostHelper) return (_CUresult(err),) {{endif}} @@ -32625,26 +32775,29 @@ def cuMemcpyDtoHAsync(dstHost, srcDevice, size_t ByteCount, hStream): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cysrcDevice - if srcDevice is None: - psrcDevice = 0 - elif isinstance(srcDevice, (CUdeviceptr,)): - psrcDevice = int(srcDevice) - else: - psrcDevice = int(CUdeviceptr(srcDevice)) - cysrcDevice = psrcDevice cdef _HelperInputVoidPtrStruct cydstHostHelper - cdef void* cydstHost = _helper_input_void_ptr(dstHost, &cydstHostHelper) - with nogil: - err = cydriver.cuMemcpyDtoHAsync(cydstHost, cysrcDevice, ByteCount, cyhStream) - _helper_input_void_ptr_free(&cydstHostHelper) + cdef void* cydstHost + try: + cydstHost = _helper_input_void_ptr(dstHost, &cydstHostHelper) + if srcDevice is None: + psrcDevice = 0 + elif isinstance(srcDevice, (CUdeviceptr,)): + psrcDevice = int(srcDevice) + else: + psrcDevice = int(CUdeviceptr(srcDevice)) + cysrcDevice = psrcDevice + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + with nogil: + err = cydriver.cuMemcpyDtoHAsync(cydstHost, cysrcDevice, ByteCount, cyhStream) + finally: + _helper_input_void_ptr_free(&cydstHostHelper) return (_CUresult(err),) {{endif}} @@ -32679,21 +32832,7 @@ def cuMemcpyDtoDAsync(dstDevice, srcDevice, size_t ByteCount, hStream): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cysrcDevice - if srcDevice is None: - psrcDevice = 0 - elif isinstance(srcDevice, (CUdeviceptr,)): - psrcDevice = int(srcDevice) - else: - psrcDevice = int(CUdeviceptr(srcDevice)) - cysrcDevice = psrcDevice cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -32702,6 +32841,20 @@ def cuMemcpyDtoDAsync(dstDevice, srcDevice, size_t ByteCount, hStream): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if srcDevice is None: + psrcDevice = 0 + elif isinstance(srcDevice, (CUdeviceptr,)): + psrcDevice = int(srcDevice) + else: + psrcDevice = int(CUdeviceptr(srcDevice)) + cysrcDevice = psrcDevice + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemcpyDtoDAsync(cydstDevice, cysrcDevice, ByteCount, cyhStream) return (_CUresult(err),) @@ -32741,26 +32894,29 @@ def cuMemcpyHtoAAsync(dstArray, size_t dstOffset, srcHost, size_t ByteCount, hSt :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyToArrayAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - cdef cydriver.CUarray cydstArray - if dstArray is None: - pdstArray = 0 - elif isinstance(dstArray, (CUarray,)): - pdstArray = int(dstArray) - else: - pdstArray = int(CUarray(dstArray)) - cydstArray = pdstArray cdef _HelperInputVoidPtrStruct cysrcHostHelper - cdef void* cysrcHost = _helper_input_void_ptr(srcHost, &cysrcHostHelper) - with nogil: - err = cydriver.cuMemcpyHtoAAsync(cydstArray, dstOffset, cysrcHost, ByteCount, cyhStream) - _helper_input_void_ptr_free(&cysrcHostHelper) + cdef void* cysrcHost + cdef cydriver.CUarray cydstArray + try: + if dstArray is None: + pdstArray = 0 + elif isinstance(dstArray, (CUarray,)): + pdstArray = int(dstArray) + else: + pdstArray = int(CUarray(dstArray)) + cydstArray = pdstArray + cysrcHost = _helper_input_void_ptr(srcHost, &cysrcHostHelper) + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + with nogil: + err = cydriver.cuMemcpyHtoAAsync(cydstArray, dstOffset, cysrcHost, ByteCount, cyhStream) + finally: + _helper_input_void_ptr_free(&cysrcHostHelper) return (_CUresult(err),) {{endif}} @@ -32798,26 +32954,29 @@ def cuMemcpyAtoHAsync(dstHost, srcArray, size_t srcOffset, size_t ByteCount, hSt :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpyFromArrayAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUarray cysrcArray - if srcArray is None: - psrcArray = 0 - elif isinstance(srcArray, (CUarray,)): - psrcArray = int(srcArray) - else: - psrcArray = int(CUarray(srcArray)) - cysrcArray = psrcArray cdef _HelperInputVoidPtrStruct cydstHostHelper - cdef void* cydstHost = _helper_input_void_ptr(dstHost, &cydstHostHelper) - with nogil: - err = cydriver.cuMemcpyAtoHAsync(cydstHost, cysrcArray, srcOffset, ByteCount, cyhStream) - _helper_input_void_ptr_free(&cydstHostHelper) + cdef void* cydstHost + try: + cydstHost = _helper_input_void_ptr(dstHost, &cydstHostHelper) + if srcArray is None: + psrcArray = 0 + elif isinstance(srcArray, (CUarray,)): + psrcArray = int(srcArray) + else: + psrcArray = int(CUarray(srcArray)) + cysrcArray = psrcArray + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + with nogil: + err = cydriver.cuMemcpyAtoHAsync(cydstHost, cysrcArray, srcOffset, ByteCount, cyhStream) + finally: + _helper_input_void_ptr_free(&cydstHostHelper) return (_CUresult(err),) {{endif}} @@ -32952,6 +33111,8 @@ def cuMemcpy2DAsync(pCopy : Optional[CUDA_MEMCPY2D], hStream): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync` """ cdef cydriver.CUstream cyhStream + cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr + cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL if hStream is None: phStream = 0 elif isinstance(hStream, (CUstream,)): @@ -32959,7 +33120,6 @@ def cuMemcpy2DAsync(pCopy : Optional[CUDA_MEMCPY2D], hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef cydriver.CUDA_MEMCPY2D* cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL with nogil: err = cydriver.cuMemcpy2DAsync(cypCopy_ptr, cyhStream) return (_CUresult(err),) @@ -33092,6 +33252,8 @@ def cuMemcpy3DAsync(pCopy : Optional[CUDA_MEMCPY3D], hStream): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemcpy3DAsync` """ cdef cydriver.CUstream cyhStream + cdef cydriver.CUDA_MEMCPY3D* cypCopy_ptr + cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL if hStream is None: phStream = 0 elif isinstance(hStream, (CUstream,)): @@ -33099,7 +33261,6 @@ def cuMemcpy3DAsync(pCopy : Optional[CUDA_MEMCPY3D], hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef cydriver.CUDA_MEMCPY3D* cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL with nogil: err = cydriver.cuMemcpy3DAsync(cypCopy_ptr, cyhStream) return (_CUresult(err),) @@ -33132,6 +33293,8 @@ def cuMemcpy3DPeerAsync(pCopy : Optional[CUDA_MEMCPY3D_PEER], hStream): :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync` """ cdef cydriver.CUstream cyhStream + cdef cydriver.CUDA_MEMCPY3D_PEER* cypCopy_ptr + cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL if hStream is None: phStream = 0 elif isinstance(hStream, (CUstream,)): @@ -33139,7 +33302,6 @@ def cuMemcpy3DPeerAsync(pCopy : Optional[CUDA_MEMCPY3D_PEER], hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef cydriver.CUDA_MEMCPY3D_PEER* cypCopy_ptr = pCopy._pvt_ptr if pCopy is not None else NULL with nogil: err = cydriver.cuMemcpy3DPeerAsync(cypCopy_ptr, cyhStream) return (_CUresult(err),) @@ -33250,70 +33412,74 @@ def cuMemcpyBatchAsync(dsts : Optional[tuple[CUdeviceptr] | list[CUdeviceptr]], :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - if not all(isinstance(_x, (int)) for _x in attrsIdxs): - raise TypeError("Argument 'attrsIdxs' is not instance of type (expected tuple[int] or list[int]") - attrs = [] if attrs is None else attrs - if not all(isinstance(_x, (CUmemcpyAttributes,)) for _x in attrs): - raise TypeError("Argument 'attrs' is not instance of type (expected tuple[cydriver.CUmemcpyAttributes,] or list[cydriver.CUmemcpyAttributes,]") - if not all(isinstance(_x, (int)) for _x in sizes): - raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") - srcs = [] if srcs is None else srcs - if not all(isinstance(_x, (CUdeviceptr,)) for _x in srcs): - raise TypeError("Argument 'srcs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") - dsts = [] if dsts is None else dsts - if not all(isinstance(_x, (CUdeviceptr,)) for _x in dsts): - raise TypeError("Argument 'dsts' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") - cdef cydriver.CUdeviceptr* cydsts = NULL - if len(dsts) > 1: - cydsts = calloc(len(dsts), sizeof(cydriver.CUdeviceptr)) - if cydsts is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dsts)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) - else: - for idx in range(len(dsts)): - cydsts[idx] = (dsts[idx])._pvt_ptr[0] - elif len(dsts) == 1: - cydsts = (dsts[0])._pvt_ptr - cdef cydriver.CUdeviceptr* cysrcs = NULL - if len(srcs) > 1: - cysrcs = calloc(len(srcs), sizeof(cydriver.CUdeviceptr)) - if cysrcs is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(srcs)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) - else: - for idx in range(len(srcs)): - cysrcs[idx] = (srcs[idx])._pvt_ptr[0] - elif len(srcs) == 1: - cysrcs = (srcs[0])._pvt_ptr - cdef vector[size_t] cysizes = sizes - if count > len(dsts): raise RuntimeError("List is too small: " + str(len(dsts)) + " < " + str(count)) - if count > len(srcs): raise RuntimeError("List is too small: " + str(len(srcs)) + " < " + str(count)) - if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + cdef vector[size_t] cyattrsIdxs cdef cydriver.CUmemcpyAttributes* cyattrs = NULL - if len(attrs) > 1: - cyattrs = calloc(len(attrs), sizeof(cydriver.CUmemcpyAttributes)) - if cyattrs is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(attrs)) + 'x' + str(sizeof(cydriver.CUmemcpyAttributes))) - for idx in range(len(attrs)): - string.memcpy(&cyattrs[idx], (attrs[idx])._pvt_ptr, sizeof(cydriver.CUmemcpyAttributes)) - elif len(attrs) == 1: - cyattrs = (attrs[0])._pvt_ptr - cdef vector[size_t] cyattrsIdxs = attrsIdxs - if numAttrs > len(attrs): raise RuntimeError("List is too small: " + str(len(attrs)) + " < " + str(numAttrs)) - if numAttrs > len(attrsIdxs): raise RuntimeError("List is too small: " + str(len(attrsIdxs)) + " < " + str(numAttrs)) - with nogil: - err = cydriver.cuMemcpyBatchAsync(cydsts, cysrcs, cysizes.data(), count, cyattrs, cyattrsIdxs.data(), numAttrs, cyhStream) - if len(dsts) > 1 and cydsts is not NULL: - free(cydsts) - if len(srcs) > 1 and cysrcs is not NULL: - free(cysrcs) - if len(attrs) > 1 and cyattrs is not NULL: - free(cyattrs) + cdef vector[size_t] cysizes + cdef cydriver.CUdeviceptr* cysrcs = NULL + cdef cydriver.CUdeviceptr* cydsts = NULL + try: + dsts = [] if dsts is None else dsts + if not all(isinstance(_x, (CUdeviceptr,)) for _x in dsts): + raise TypeError("Argument 'dsts' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") + if len(dsts) > 1: + cydsts = calloc(len(dsts), sizeof(cydriver.CUdeviceptr)) + if cydsts is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dsts)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) + else: + for idx in range(len(dsts)): + cydsts[idx] = (dsts[idx])._pvt_ptr[0] + elif len(dsts) == 1: + cydsts = (dsts[0])._pvt_ptr + srcs = [] if srcs is None else srcs + if not all(isinstance(_x, (CUdeviceptr,)) for _x in srcs): + raise TypeError("Argument 'srcs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") + if len(srcs) > 1: + cysrcs = calloc(len(srcs), sizeof(cydriver.CUdeviceptr)) + if cysrcs is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(srcs)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) + else: + for idx in range(len(srcs)): + cysrcs[idx] = (srcs[idx])._pvt_ptr[0] + elif len(srcs) == 1: + cysrcs = (srcs[0])._pvt_ptr + if not all(isinstance(_x, (int)) for _x in sizes): + raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") + cysizes = sizes + if count > len(dsts): raise RuntimeError("List is too small: " + str(len(dsts)) + " < " + str(count)) + if count > len(srcs): raise RuntimeError("List is too small: " + str(len(srcs)) + " < " + str(count)) + if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + attrs = [] if attrs is None else attrs + if not all(isinstance(_x, (CUmemcpyAttributes,)) for _x in attrs): + raise TypeError("Argument 'attrs' is not instance of type (expected tuple[cydriver.CUmemcpyAttributes,] or list[cydriver.CUmemcpyAttributes,]") + if len(attrs) > 1: + cyattrs = calloc(len(attrs), sizeof(cydriver.CUmemcpyAttributes)) + if cyattrs is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(attrs)) + 'x' + str(sizeof(cydriver.CUmemcpyAttributes))) + for idx in range(len(attrs)): + string.memcpy(&cyattrs[idx], (attrs[idx])._pvt_ptr, sizeof(cydriver.CUmemcpyAttributes)) + elif len(attrs) == 1: + cyattrs = (attrs[0])._pvt_ptr + if not all(isinstance(_x, (int)) for _x in attrsIdxs): + raise TypeError("Argument 'attrsIdxs' is not instance of type (expected tuple[int] or list[int]") + cyattrsIdxs = attrsIdxs + if numAttrs > len(attrs): raise RuntimeError("List is too small: " + str(len(attrs)) + " < " + str(numAttrs)) + if numAttrs > len(attrsIdxs): raise RuntimeError("List is too small: " + str(len(attrsIdxs)) + " < " + str(numAttrs)) + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + with nogil: + err = cydriver.cuMemcpyBatchAsync(cydsts, cysrcs, cysizes.data(), count, cyattrs, cyattrsIdxs.data(), numAttrs, cyhStream) + finally: + if len(dsts) > 1 and cydsts is not NULL: + free(cydsts) + if len(srcs) > 1 and cysrcs is not NULL: + free(cysrcs) + if len(attrs) > 1 and cyattrs is not NULL: + free(cyattrs) return (_CUresult(err),) {{endif}} @@ -33421,30 +33587,32 @@ def cuMemcpy3DBatchAsync(size_t numOps, opList : Optional[tuple[CUDA_MEMCPY3D_BA :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - opList = [] if opList is None else opList - if not all(isinstance(_x, (CUDA_MEMCPY3D_BATCH_OP,)) for _x in opList): - raise TypeError("Argument 'opList' is not instance of type (expected tuple[cydriver.CUDA_MEMCPY3D_BATCH_OP,] or list[cydriver.CUDA_MEMCPY3D_BATCH_OP,]") - if numOps > len(opList): raise RuntimeError("List is too small: " + str(len(opList)) + " < " + str(numOps)) cdef cydriver.CUDA_MEMCPY3D_BATCH_OP* cyopList = NULL - if len(opList) > 1: - cyopList = calloc(len(opList), sizeof(cydriver.CUDA_MEMCPY3D_BATCH_OP)) - if cyopList is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(opList)) + 'x' + str(sizeof(cydriver.CUDA_MEMCPY3D_BATCH_OP))) - for idx in range(len(opList)): - string.memcpy(&cyopList[idx], (opList[idx])._pvt_ptr, sizeof(cydriver.CUDA_MEMCPY3D_BATCH_OP)) - elif len(opList) == 1: - cyopList = (opList[0])._pvt_ptr - with nogil: - err = cydriver.cuMemcpy3DBatchAsync(numOps, cyopList, flags, cyhStream) - if len(opList) > 1 and cyopList is not NULL: - free(cyopList) + try: + if numOps > len(opList): raise RuntimeError("List is too small: " + str(len(opList)) + " < " + str(numOps)) + opList = [] if opList is None else opList + if not all(isinstance(_x, (CUDA_MEMCPY3D_BATCH_OP,)) for _x in opList): + raise TypeError("Argument 'opList' is not instance of type (expected tuple[cydriver.CUDA_MEMCPY3D_BATCH_OP,] or list[cydriver.CUDA_MEMCPY3D_BATCH_OP,]") + if len(opList) > 1: + cyopList = calloc(len(opList), sizeof(cydriver.CUDA_MEMCPY3D_BATCH_OP)) + if cyopList is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(opList)) + 'x' + str(sizeof(cydriver.CUDA_MEMCPY3D_BATCH_OP))) + for idx in range(len(opList)): + string.memcpy(&cyopList[idx], (opList[idx])._pvt_ptr, sizeof(cydriver.CUDA_MEMCPY3D_BATCH_OP)) + elif len(opList) == 1: + cyopList = (opList[0])._pvt_ptr + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + with nogil: + err = cydriver.cuMemcpy3DBatchAsync(numOps, cyopList, flags, cyhStream) + finally: + if len(opList) > 1 and cyopList is not NULL: + free(cyopList) return (_CUresult(err),) {{endif}} @@ -33739,13 +33907,6 @@ def cuMemsetD8Async(dstDevice, unsigned char uc, size_t N, hStream): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemsetAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -33754,6 +33915,13 @@ def cuMemsetD8Async(dstDevice, unsigned char uc, size_t N, hStream): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemsetD8Async(cydstDevice, uc, N, cyhStream) return (_CUresult(err),) @@ -33789,13 +33957,6 @@ def cuMemsetD16Async(dstDevice, unsigned short us, size_t N, hStream): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemsetAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -33804,6 +33965,13 @@ def cuMemsetD16Async(dstDevice, unsigned short us, size_t N, hStream): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemsetD16Async(cydstDevice, us, N, cyhStream) return (_CUresult(err),) @@ -33839,13 +34007,6 @@ def cuMemsetD32Async(dstDevice, unsigned int ui, size_t N, hStream): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMemsetAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -33854,6 +34015,13 @@ def cuMemsetD32Async(dstDevice, unsigned int ui, size_t N, hStream): else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemsetD32Async(cydstDevice, ui, N, cyhStream) return (_CUresult(err),) @@ -33896,13 +34064,6 @@ def cuMemsetD2D8Async(dstDevice, size_t dstPitch, unsigned char uc, size_t Width :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset2DAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -33911,6 +34072,13 @@ def cuMemsetD2D8Async(dstDevice, size_t dstPitch, unsigned char uc, size_t Width else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemsetD2D8Async(cydstDevice, dstPitch, uc, Width, Height, cyhStream) return (_CUresult(err),) @@ -33954,13 +34122,6 @@ def cuMemsetD2D16Async(dstDevice, size_t dstPitch, unsigned short us, size_t Wid :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD2D32Async`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset2DAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -33969,6 +34130,13 @@ def cuMemsetD2D16Async(dstDevice, size_t dstPitch, unsigned short us, size_t Wid else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemsetD2D16Async(cydstDevice, dstPitch, us, Width, Height, cyhStream) return (_CUresult(err),) @@ -34012,13 +34180,6 @@ def cuMemsetD2D32Async(dstDevice, size_t dstPitch, unsigned int ui, size_t Width :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cuMemsetD32Async`, :py:obj:`~.cudaMemset2DAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cydstDevice if dstDevice is None: pdstDevice = 0 @@ -34027,6 +34188,13 @@ def cuMemsetD2D32Async(dstDevice, size_t dstPitch, unsigned int ui, size_t Width else: pdstDevice = int(CUdeviceptr(dstDevice)) cydstDevice = pdstDevice + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemsetD2D32Async(cydstDevice, dstPitch, ui, Width, Height, cyhStream) return (_CUresult(err),) @@ -34094,8 +34262,10 @@ def cuArrayCreate(pAllocateArray : Optional[CUDA_ARRAY_DESCRIPTOR]): -------- :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMallocArray` """ - cdef CUarray pHandle = CUarray() - cdef cydriver.CUDA_ARRAY_DESCRIPTOR* cypAllocateArray_ptr = pAllocateArray._pvt_ptr if pAllocateArray is not None else NULL + cdef cydriver.CUDA_ARRAY_DESCRIPTOR* cypAllocateArray_ptr + cdef CUarray pHandle + pHandle = CUarray() + cypAllocateArray_ptr = pAllocateArray._pvt_ptr if pAllocateArray is not None else NULL with nogil: err = cydriver.cuArrayCreate(pHandle._pvt_ptr, cypAllocateArray_ptr) if err != cydriver.CUDA_SUCCESS: @@ -34131,6 +34301,8 @@ def cuArrayGetDescriptor(hArray): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaArrayGetInfo` """ cdef cydriver.CUarray cyhArray + cdef CUDA_ARRAY_DESCRIPTOR pArrayDescriptor + pArrayDescriptor = CUDA_ARRAY_DESCRIPTOR() if hArray is None: phArray = 0 elif isinstance(hArray, (CUarray,)): @@ -34138,7 +34310,6 @@ def cuArrayGetDescriptor(hArray): else: phArray = int(CUarray(hArray)) cyhArray = phArray - cdef CUDA_ARRAY_DESCRIPTOR pArrayDescriptor = CUDA_ARRAY_DESCRIPTOR() with nogil: err = cydriver.cuArrayGetDescriptor(pArrayDescriptor._pvt_ptr, cyhArray) if err != cydriver.CUDA_SUCCESS: @@ -34187,6 +34358,8 @@ def cuArrayGetSparseProperties(array): :py:obj:`~.cuMipmappedArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync` """ cdef cydriver.CUarray cyarray + cdef CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties + sparseProperties = CUDA_ARRAY_SPARSE_PROPERTIES() if array is None: parray = 0 elif isinstance(array, (CUarray,)): @@ -34194,7 +34367,6 @@ def cuArrayGetSparseProperties(array): else: parray = int(CUarray(array)) cyarray = parray - cdef CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties = CUDA_ARRAY_SPARSE_PROPERTIES() with nogil: err = cydriver.cuArrayGetSparseProperties(sparseProperties._pvt_ptr, cyarray) if err != cydriver.CUDA_SUCCESS: @@ -34245,6 +34417,8 @@ def cuMipmappedArrayGetSparseProperties(mipmap): :py:obj:`~.cuArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync` """ cdef cydriver.CUmipmappedArray cymipmap + cdef CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties + sparseProperties = CUDA_ARRAY_SPARSE_PROPERTIES() if mipmap is None: pmipmap = 0 elif isinstance(mipmap, (CUmipmappedArray,)): @@ -34252,7 +34426,6 @@ def cuMipmappedArrayGetSparseProperties(mipmap): else: pmipmap = int(CUmipmappedArray(mipmap)) cymipmap = pmipmap - cdef CUDA_ARRAY_SPARSE_PROPERTIES sparseProperties = CUDA_ARRAY_SPARSE_PROPERTIES() with nogil: err = cydriver.cuMipmappedArrayGetSparseProperties(sparseProperties._pvt_ptr, cymipmap) if err != cydriver.CUDA_SUCCESS: @@ -34295,14 +34468,9 @@ def cuArrayGetMemoryRequirements(array, device): :py:obj:`~.cuMipmappedArrayGetMemoryRequirements`, :py:obj:`~.cuMemMapArrayAsync` """ cdef cydriver.CUdevice cydevice - if device is None: - pdevice = 0 - elif isinstance(device, (CUdevice,)): - pdevice = int(device) - else: - pdevice = int(CUdevice(device)) - cydevice = pdevice cdef cydriver.CUarray cyarray + cdef CUDA_ARRAY_MEMORY_REQUIREMENTS memoryRequirements + memoryRequirements = CUDA_ARRAY_MEMORY_REQUIREMENTS() if array is None: parray = 0 elif isinstance(array, (CUarray,)): @@ -34310,7 +34478,13 @@ def cuArrayGetMemoryRequirements(array, device): else: parray = int(CUarray(array)) cyarray = parray - cdef CUDA_ARRAY_MEMORY_REQUIREMENTS memoryRequirements = CUDA_ARRAY_MEMORY_REQUIREMENTS() + if device is None: + pdevice = 0 + elif isinstance(device, (CUdevice,)): + pdevice = int(device) + else: + pdevice = int(CUdevice(device)) + cydevice = pdevice with nogil: err = cydriver.cuArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cyarray, cydevice) if err != cydriver.CUDA_SUCCESS: @@ -34354,14 +34528,9 @@ def cuMipmappedArrayGetMemoryRequirements(mipmap, device): :py:obj:`~.cuArrayGetMemoryRequirements`, :py:obj:`~.cuMemMapArrayAsync` """ cdef cydriver.CUdevice cydevice - if device is None: - pdevice = 0 - elif isinstance(device, (CUdevice,)): - pdevice = int(device) - else: - pdevice = int(CUdevice(device)) - cydevice = pdevice cdef cydriver.CUmipmappedArray cymipmap + cdef CUDA_ARRAY_MEMORY_REQUIREMENTS memoryRequirements + memoryRequirements = CUDA_ARRAY_MEMORY_REQUIREMENTS() if mipmap is None: pmipmap = 0 elif isinstance(mipmap, (CUmipmappedArray,)): @@ -34369,7 +34538,13 @@ def cuMipmappedArrayGetMemoryRequirements(mipmap, device): else: pmipmap = int(CUmipmappedArray(mipmap)) cymipmap = pmipmap - cdef CUDA_ARRAY_MEMORY_REQUIREMENTS memoryRequirements = CUDA_ARRAY_MEMORY_REQUIREMENTS() + if device is None: + pdevice = 0 + elif isinstance(device, (CUdevice,)): + pdevice = int(device) + else: + pdevice = int(CUdevice(device)) + cydevice = pdevice with nogil: err = cydriver.cuMipmappedArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cymipmap, cydevice) if err != cydriver.CUDA_SUCCESS: @@ -34418,6 +34593,8 @@ def cuArrayGetPlane(hArray, unsigned int planeIdx): :py:obj:`~.cuArrayCreate`, :py:obj:`~.cudaArrayGetPlane` """ cdef cydriver.CUarray cyhArray + cdef CUarray pPlaneArray + pPlaneArray = CUarray() if hArray is None: phArray = 0 elif isinstance(hArray, (CUarray,)): @@ -34425,7 +34602,6 @@ def cuArrayGetPlane(hArray, unsigned int planeIdx): else: phArray = int(CUarray(hArray)) cyhArray = phArray - cdef CUarray pPlaneArray = CUarray() with nogil: err = cydriver.cuArrayGetPlane(pPlaneArray._pvt_ptr, cyhArray, planeIdx) if err != cydriver.CUDA_SUCCESS: @@ -34594,8 +34770,10 @@ def cuArray3DCreate(pAllocateArray : Optional[CUDA_ARRAY3D_DESCRIPTOR]): -------- :py:obj:`~.cuArray3DGetDescriptor`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaMalloc3DArray` """ - cdef CUarray pHandle = CUarray() - cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR* cypAllocateArray_ptr = pAllocateArray._pvt_ptr if pAllocateArray is not None else NULL + cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR* cypAllocateArray_ptr + cdef CUarray pHandle + pHandle = CUarray() + cypAllocateArray_ptr = pAllocateArray._pvt_ptr if pAllocateArray is not None else NULL with nogil: err = cydriver.cuArray3DCreate(pHandle._pvt_ptr, cypAllocateArray_ptr) if err != cydriver.CUDA_SUCCESS: @@ -34635,6 +34813,8 @@ def cuArray3DGetDescriptor(hArray): :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArrayDestroy`, :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemAllocPitch`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DAsync`, :py:obj:`~.cuMemcpy2DUnaligned`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuMemcpy3DAsync`, :py:obj:`~.cuMemcpyAtoA`, :py:obj:`~.cuMemcpyAtoD`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpyDtoA`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpyDtoDAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemGetAddressRange`, :py:obj:`~.cuMemGetInfo`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`, :py:obj:`~.cudaArrayGetInfo` """ cdef cydriver.CUarray cyhArray + cdef CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor + pArrayDescriptor = CUDA_ARRAY3D_DESCRIPTOR() if hArray is None: phArray = 0 elif isinstance(hArray, (CUarray,)): @@ -34642,7 +34822,6 @@ def cuArray3DGetDescriptor(hArray): else: phArray = int(CUarray(hArray)) cyhArray = phArray - cdef CUDA_ARRAY3D_DESCRIPTOR pArrayDescriptor = CUDA_ARRAY3D_DESCRIPTOR() with nogil: err = cydriver.cuArray3DGetDescriptor(pArrayDescriptor._pvt_ptr, cyhArray) if err != cydriver.CUDA_SUCCESS: @@ -34764,8 +34943,10 @@ def cuMipmappedArrayCreate(pMipmappedArrayDesc : Optional[CUDA_ARRAY3D_DESCRIPTO -------- :py:obj:`~.cuMipmappedArrayDestroy`, :py:obj:`~.cuMipmappedArrayGetLevel`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cudaMallocMipmappedArray` """ - cdef CUmipmappedArray pHandle = CUmipmappedArray() - cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR* cypMipmappedArrayDesc_ptr = pMipmappedArrayDesc._pvt_ptr if pMipmappedArrayDesc is not None else NULL + cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR* cypMipmappedArrayDesc_ptr + cdef CUmipmappedArray pHandle + pHandle = CUmipmappedArray() + cypMipmappedArrayDesc_ptr = pMipmappedArrayDesc._pvt_ptr if pMipmappedArrayDesc is not None else NULL with nogil: err = cydriver.cuMipmappedArrayCreate(pHandle._pvt_ptr, cypMipmappedArrayDesc_ptr, numMipmapLevels) if err != cydriver.CUDA_SUCCESS: @@ -34804,6 +34985,8 @@ def cuMipmappedArrayGetLevel(hMipmappedArray, unsigned int level): :py:obj:`~.cuMipmappedArrayCreate`, :py:obj:`~.cuMipmappedArrayDestroy`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cudaGetMipmappedArrayLevel` """ cdef cydriver.CUmipmappedArray cyhMipmappedArray + cdef CUarray pLevelArray + pLevelArray = CUarray() if hMipmappedArray is None: phMipmappedArray = 0 elif isinstance(hMipmappedArray, (CUmipmappedArray,)): @@ -34811,7 +34994,6 @@ def cuMipmappedArrayGetLevel(hMipmappedArray, unsigned int level): else: phMipmappedArray = int(CUmipmappedArray(hMipmappedArray)) cyhMipmappedArray = phMipmappedArray - cdef CUarray pLevelArray = CUarray() with nogil: err = cydriver.cuMipmappedArrayGetLevel(pLevelArray._pvt_ptr, cyhMipmappedArray, level) if err != cydriver.CUDA_SUCCESS: @@ -34918,7 +35100,10 @@ def cuMemGetHandleForAddressRange(dptr, size_t size, handleType not None : CUmem handle : Any Pointer to the location where the returned handle will be stored. """ + cdef cydriver.CUmemRangeHandleType cyhandleType cdef cydriver.CUdeviceptr cydptr + cdef int handle = 0 + cdef void* cyhandle_ptr = &handle if dptr is None: pdptr = 0 elif isinstance(dptr, (CUdeviceptr,)): @@ -34926,9 +35111,7 @@ def cuMemGetHandleForAddressRange(dptr, size_t size, handleType not None : CUmem else: pdptr = int(CUdeviceptr(dptr)) cydptr = pdptr - cdef int handle = 0 - cdef void* cyhandle_ptr = &handle - cdef cydriver.CUmemRangeHandleType cyhandleType = int(handleType) + cyhandleType = int(handleType) with nogil: err = cydriver.cuMemGetHandleForAddressRange(cyhandle_ptr, cydptr, size, cyhandleType, flags) if err != cydriver.CUDA_SUCCESS: @@ -35005,6 +35188,9 @@ def cuMemBatchDecompressAsync(paramsArray : Optional[CUmemDecompressParams], siz :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuMemAllocFromPoolAsync` """ cdef cydriver.CUstream cystream + cdef size_t errorIndex = 0 + cdef cydriver.CUmemDecompressParams* cyparamsArray_ptr + cyparamsArray_ptr = paramsArray._pvt_ptr if paramsArray is not None else NULL if stream is None: pstream = 0 elif isinstance(stream, (CUstream,)): @@ -35012,8 +35198,6 @@ def cuMemBatchDecompressAsync(paramsArray : Optional[CUmemDecompressParams], siz else: pstream = int(CUstream(stream)) cystream = pstream - cdef cydriver.CUmemDecompressParams* cyparamsArray_ptr = paramsArray._pvt_ptr if paramsArray is not None else NULL - cdef size_t errorIndex = 0 with nogil: err = cydriver.cuMemBatchDecompressAsync(cyparamsArray_ptr, count, flags, &errorIndex, cystream) if err != cydriver.CUDA_SUCCESS: @@ -35059,6 +35243,8 @@ def cuMemAddressReserve(size_t size, size_t alignment, addr, unsigned long long :py:obj:`~.cuMemAddressFree` """ cdef cydriver.CUdeviceptr cyaddr + cdef CUdeviceptr ptr + ptr = CUdeviceptr() if addr is None: paddr = 0 elif isinstance(addr, (CUdeviceptr,)): @@ -35066,7 +35252,6 @@ def cuMemAddressReserve(size_t size, size_t alignment, addr, unsigned long long else: paddr = int(CUdeviceptr(addr)) cyaddr = paddr - cdef CUdeviceptr ptr = CUdeviceptr() with nogil: err = cydriver.cuMemAddressReserve(ptr._pvt_ptr, size, alignment, cyaddr, flags) if err != cydriver.CUDA_SUCCESS: @@ -35197,8 +35382,10 @@ def cuMemCreate(size_t size, prop : Optional[CUmemAllocationProp], unsigned long -------- :py:obj:`~.cuMemRelease`, :py:obj:`~.cuMemExportToShareableHandle`, :py:obj:`~.cuMemImportFromShareableHandle` """ - cdef CUmemGenericAllocationHandle handle = CUmemGenericAllocationHandle() - cdef cydriver.CUmemAllocationProp* cyprop_ptr = prop._pvt_ptr if prop is not None else NULL + cdef cydriver.CUmemAllocationProp* cyprop_ptr + cdef CUmemGenericAllocationHandle handle + handle = CUmemGenericAllocationHandle() + cyprop_ptr = prop._pvt_ptr if prop is not None else NULL with nogil: err = cydriver.cuMemCreate(handle._pvt_ptr, size, cyprop_ptr, flags) if err != cydriver.CUDA_SUCCESS: @@ -35313,13 +35500,6 @@ def cuMemMap(ptr, size_t size, size_t offset, handle, unsigned long long flags): :py:obj:`~.cuMemUnmap`, :py:obj:`~.cuMemSetAccess`, :py:obj:`~.cuMemCreate`, :py:obj:`~.cuMemAddressReserve`, :py:obj:`~.cuMemImportFromShareableHandle` """ cdef cydriver.CUmemGenericAllocationHandle cyhandle - if handle is None: - phandle = 0 - elif isinstance(handle, (CUmemGenericAllocationHandle,)): - phandle = int(handle) - else: - phandle = int(CUmemGenericAllocationHandle(handle)) - cyhandle = phandle cdef cydriver.CUdeviceptr cyptr if ptr is None: pptr = 0 @@ -35328,6 +35508,13 @@ def cuMemMap(ptr, size_t size, size_t offset, handle, unsigned long long flags): else: pptr = int(CUdeviceptr(ptr)) cyptr = pptr + if handle is None: + phandle = 0 + elif isinstance(handle, (CUmemGenericAllocationHandle,)): + phandle = int(handle) + else: + phandle = int(CUmemGenericAllocationHandle(handle)) + cyhandle = phandle with nogil: err = cydriver.cuMemMap(cyptr, size, offset, cyhandle, flags) return (_CUresult(err),) @@ -35480,30 +35667,32 @@ def cuMemMapArrayAsync(mapInfoList : Optional[tuple[CUarrayMapInfo] | list[CUarr :py:obj:`~.cuMipmappedArrayCreate`, :py:obj:`~.cuArrayCreate`, :py:obj:`~.cuArray3DCreate`, :py:obj:`~.cuMemCreate`, :py:obj:`~.cuArrayGetSparseProperties`, :py:obj:`~.cuMipmappedArrayGetSparseProperties` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - mapInfoList = [] if mapInfoList is None else mapInfoList - if not all(isinstance(_x, (CUarrayMapInfo,)) for _x in mapInfoList): - raise TypeError("Argument 'mapInfoList' is not instance of type (expected tuple[cydriver.CUarrayMapInfo,] or list[cydriver.CUarrayMapInfo,]") cdef cydriver.CUarrayMapInfo* cymapInfoList = NULL - if len(mapInfoList) > 1: - cymapInfoList = calloc(len(mapInfoList), sizeof(cydriver.CUarrayMapInfo)) - if cymapInfoList is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(mapInfoList)) + 'x' + str(sizeof(cydriver.CUarrayMapInfo))) - for idx in range(len(mapInfoList)): - string.memcpy(&cymapInfoList[idx], (mapInfoList[idx])._pvt_ptr, sizeof(cydriver.CUarrayMapInfo)) - elif len(mapInfoList) == 1: - cymapInfoList = (mapInfoList[0])._pvt_ptr - if count > len(mapInfoList): raise RuntimeError("List is too small: " + str(len(mapInfoList)) + " < " + str(count)) - with nogil: - err = cydriver.cuMemMapArrayAsync(cymapInfoList, count, cyhStream) - if len(mapInfoList) > 1 and cymapInfoList is not NULL: - free(cymapInfoList) + try: + mapInfoList = [] if mapInfoList is None else mapInfoList + if not all(isinstance(_x, (CUarrayMapInfo,)) for _x in mapInfoList): + raise TypeError("Argument 'mapInfoList' is not instance of type (expected tuple[cydriver.CUarrayMapInfo,] or list[cydriver.CUarrayMapInfo,]") + if len(mapInfoList) > 1: + cymapInfoList = calloc(len(mapInfoList), sizeof(cydriver.CUarrayMapInfo)) + if cymapInfoList is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(mapInfoList)) + 'x' + str(sizeof(cydriver.CUarrayMapInfo))) + for idx in range(len(mapInfoList)): + string.memcpy(&cymapInfoList[idx], (mapInfoList[idx])._pvt_ptr, sizeof(cydriver.CUarrayMapInfo)) + elif len(mapInfoList) == 1: + cymapInfoList = (mapInfoList[0])._pvt_ptr + if count > len(mapInfoList): raise RuntimeError("List is too small: " + str(len(mapInfoList)) + " < " + str(count)) + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + with nogil: + err = cydriver.cuMemMapArrayAsync(cymapInfoList, count, cyhStream) + finally: + if len(mapInfoList) > 1 and cymapInfoList is not NULL: + free(cymapInfoList) return (_CUresult(err),) {{endif}} @@ -35600,31 +35789,33 @@ def cuMemSetAccess(ptr, size_t size, desc : Optional[tuple[CUmemAccessDesc] | li -------- :py:obj:`~.cuMemSetAccess`, :py:obj:`~.cuMemCreate`, :py:obj:`~.py`:obj:`~.cuMemMap` """ - desc = [] if desc is None else desc - if not all(isinstance(_x, (CUmemAccessDesc,)) for _x in desc): - raise TypeError("Argument 'desc' is not instance of type (expected tuple[cydriver.CUmemAccessDesc,] or list[cydriver.CUmemAccessDesc,]") - cdef cydriver.CUdeviceptr cyptr - if ptr is None: - pptr = 0 - elif isinstance(ptr, (CUdeviceptr,)): - pptr = int(ptr) - else: - pptr = int(CUdeviceptr(ptr)) - cyptr = pptr cdef cydriver.CUmemAccessDesc* cydesc = NULL - if len(desc) > 1: - cydesc = calloc(len(desc), sizeof(cydriver.CUmemAccessDesc)) - if cydesc is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(desc)) + 'x' + str(sizeof(cydriver.CUmemAccessDesc))) - for idx in range(len(desc)): - string.memcpy(&cydesc[idx], (desc[idx])._pvt_ptr, sizeof(cydriver.CUmemAccessDesc)) - elif len(desc) == 1: - cydesc = (desc[0])._pvt_ptr - if count > len(desc): raise RuntimeError("List is too small: " + str(len(desc)) + " < " + str(count)) - with nogil: - err = cydriver.cuMemSetAccess(cyptr, size, cydesc, count) - if len(desc) > 1 and cydesc is not NULL: - free(cydesc) + cdef cydriver.CUdeviceptr cyptr + try: + if ptr is None: + pptr = 0 + elif isinstance(ptr, (CUdeviceptr,)): + pptr = int(ptr) + else: + pptr = int(CUdeviceptr(ptr)) + cyptr = pptr + desc = [] if desc is None else desc + if not all(isinstance(_x, (CUmemAccessDesc,)) for _x in desc): + raise TypeError("Argument 'desc' is not instance of type (expected tuple[cydriver.CUmemAccessDesc,] or list[cydriver.CUmemAccessDesc,]") + if len(desc) > 1: + cydesc = calloc(len(desc), sizeof(cydriver.CUmemAccessDesc)) + if cydesc is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(desc)) + 'x' + str(sizeof(cydriver.CUmemAccessDesc))) + for idx in range(len(desc)): + string.memcpy(&cydesc[idx], (desc[idx])._pvt_ptr, sizeof(cydriver.CUmemAccessDesc)) + elif len(desc) == 1: + cydesc = (desc[0])._pvt_ptr + if count > len(desc): raise RuntimeError("List is too small: " + str(len(desc)) + " < " + str(count)) + with nogil: + err = cydriver.cuMemSetAccess(cyptr, size, cydesc, count) + finally: + if len(desc) > 1 and cydesc is not NULL: + free(cydesc) return (_CUresult(err),) {{endif}} @@ -35653,6 +35844,9 @@ def cuMemGetAccess(location : Optional[CUmemLocation], ptr): :py:obj:`~.cuMemSetAccess` """ cdef cydriver.CUdeviceptr cyptr + cdef cydriver.CUmemLocation* cylocation_ptr + cdef unsigned long long flags = 0 + cylocation_ptr = location._pvt_ptr if location is not None else NULL if ptr is None: pptr = 0 elif isinstance(ptr, (CUdeviceptr,)): @@ -35660,8 +35854,6 @@ def cuMemGetAccess(location : Optional[CUmemLocation], ptr): else: pptr = int(CUdeviceptr(ptr)) cyptr = pptr - cdef unsigned long long flags = 0 - cdef cydriver.CUmemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL with nogil: err = cydriver.cuMemGetAccess(&flags, cylocation_ptr, cyptr) if err != cydriver.CUDA_SUCCESS: @@ -35711,7 +35903,12 @@ def cuMemExportToShareableHandle(handle, handleType not None : CUmemAllocationHa -------- :py:obj:`~.cuMemImportFromShareableHandle` """ + cdef cydriver.CUmemAllocationHandleType cyhandleType cdef cydriver.CUmemGenericAllocationHandle cyhandle + cdef _HelperCUmemAllocationHandleType cyshareableHandle + cdef void* cyshareableHandle_ptr + cyshareableHandle = _HelperCUmemAllocationHandleType(handleType) + cyshareableHandle_ptr = cyshareableHandle.cptr if handle is None: phandle = 0 elif isinstance(handle, (CUmemGenericAllocationHandle,)): @@ -35719,9 +35916,7 @@ def cuMemExportToShareableHandle(handle, handleType not None : CUmemAllocationHa else: phandle = int(CUmemGenericAllocationHandle(handle)) cyhandle = phandle - cdef _HelperCUmemAllocationHandleType cyshareableHandle = _HelperCUmemAllocationHandleType(handleType) - cdef void* cyshareableHandle_ptr = cyshareableHandle.cptr - cdef cydriver.CUmemAllocationHandleType cyhandleType = int(handleType) + cyhandleType = int(handleType) with nogil: err = cydriver.cuMemExportToShareableHandle(cyshareableHandle_ptr, cyhandle, cyhandleType, flags) if err != cydriver.CUDA_SUCCESS: @@ -35768,13 +35963,18 @@ def cuMemImportFromShareableHandle(osHandle, shHandleType not None : CUmemAlloca ----- Importing shareable handles exported from some graphics APIs(VUlkan, OpenGL, etc) created on devices under an SLI group may not be supported, and thus this API will return CUDA_ERROR_NOT_SUPPORTED. There is no guarantee that the contents of `handle` will be the same CUDA memory handle for the same given OS shareable handle, or the same underlying allocation. """ - cdef CUmemGenericAllocationHandle handle = CUmemGenericAllocationHandle() + cdef cydriver.CUmemAllocationHandleType cyshHandleType cdef _HelperInputVoidPtrStruct cyosHandleHelper - cdef void* cyosHandle = _helper_input_void_ptr(osHandle, &cyosHandleHelper) - cdef cydriver.CUmemAllocationHandleType cyshHandleType = int(shHandleType) - with nogil: - err = cydriver.cuMemImportFromShareableHandle(handle._pvt_ptr, cyosHandle, cyshHandleType) - _helper_input_void_ptr_free(&cyosHandleHelper) + cdef void* cyosHandle + cdef CUmemGenericAllocationHandle handle + try: + handle = CUmemGenericAllocationHandle() + cyosHandle = _helper_input_void_ptr(osHandle, &cyosHandleHelper) + cyshHandleType = int(shHandleType) + with nogil: + err = cydriver.cuMemImportFromShareableHandle(handle._pvt_ptr, cyosHandle, cyshHandleType) + finally: + _helper_input_void_ptr_free(&cyosHandleHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, handle) @@ -35809,9 +36009,11 @@ def cuMemGetAllocationGranularity(prop : Optional[CUmemAllocationProp], option n -------- :py:obj:`~.cuMemCreate`, :py:obj:`~.cuMemMap` """ + cdef cydriver.CUmemAllocationGranularity_flags cyoption + cdef cydriver.CUmemAllocationProp* cyprop_ptr cdef size_t granularity = 0 - cdef cydriver.CUmemAllocationProp* cyprop_ptr = prop._pvt_ptr if prop is not None else NULL - cdef cydriver.CUmemAllocationGranularity_flags cyoption = int(option) + cyprop_ptr = prop._pvt_ptr if prop is not None else NULL + cyoption = int(option) with nogil: err = cydriver.cuMemGetAllocationGranularity(&granularity, cyprop_ptr, cyoption) if err != cydriver.CUDA_SUCCESS: @@ -35843,6 +36045,8 @@ def cuMemGetAllocationPropertiesFromHandle(handle): :py:obj:`~.cuMemCreate`, :py:obj:`~.cuMemImportFromShareableHandle` """ cdef cydriver.CUmemGenericAllocationHandle cyhandle + cdef CUmemAllocationProp prop + prop = CUmemAllocationProp() if handle is None: phandle = 0 elif isinstance(handle, (CUmemGenericAllocationHandle,)): @@ -35850,7 +36054,6 @@ def cuMemGetAllocationPropertiesFromHandle(handle): else: phandle = int(CUmemGenericAllocationHandle(handle)) cyhandle = phandle - cdef CUmemAllocationProp prop = CUmemAllocationProp() with nogil: err = cydriver.cuMemGetAllocationPropertiesFromHandle(prop._pvt_ptr, cyhandle) if err != cydriver.CUDA_SUCCESS: @@ -35889,12 +36092,16 @@ def cuMemRetainAllocationHandle(addr): ----- The address `addr`, can be any address in a range previously mapped by :py:obj:`~.cuMemMap`, and not necessarily the start address. """ - cdef CUmemGenericAllocationHandle handle = CUmemGenericAllocationHandle() cdef _HelperInputVoidPtrStruct cyaddrHelper - cdef void* cyaddr = _helper_input_void_ptr(addr, &cyaddrHelper) - with nogil: - err = cydriver.cuMemRetainAllocationHandle(handle._pvt_ptr, cyaddr) - _helper_input_void_ptr_free(&cyaddrHelper) + cdef void* cyaddr + cdef CUmemGenericAllocationHandle handle + try: + handle = CUmemGenericAllocationHandle() + cyaddr = _helper_input_void_ptr(addr, &cyaddrHelper) + with nogil: + err = cydriver.cuMemRetainAllocationHandle(handle._pvt_ptr, cyaddr) + finally: + _helper_input_void_ptr_free(&cyaddrHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, handle) @@ -35928,13 +36135,6 @@ def cuMemFreeAsync(dptr, hStream): During stream capture, this function results in the creation of a free node and must therefore be passed the address of a graph allocation. """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cydptr if dptr is None: pdptr = 0 @@ -35943,6 +36143,13 @@ def cuMemFreeAsync(dptr, hStream): else: pdptr = int(CUdeviceptr(dptr)) cydptr = pdptr + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemFreeAsync(cydptr, cyhStream) return (_CUresult(err),) @@ -35987,6 +36194,8 @@ def cuMemAllocAsync(size_t bytesize, hStream): During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters. """ cdef cydriver.CUstream cyhStream + cdef CUdeviceptr dptr + dptr = CUdeviceptr() if hStream is None: phStream = 0 elif isinstance(hStream, (CUstream,)): @@ -35994,7 +36203,6 @@ def cuMemAllocAsync(size_t bytesize, hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef CUdeviceptr dptr = CUdeviceptr() with nogil: err = cydriver.cuMemAllocAsync(dptr._pvt_ptr, bytesize, cyhStream) if err != cydriver.CUDA_SUCCESS: @@ -36111,6 +36319,9 @@ def cuMemPoolSetAttribute(pool, attr not None : CUmemPool_attribute, value): -------- :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate` """ + cdef _HelperCUmemPool_attribute cyvalue + cdef void *cyvalueptr + cdef cydriver.CUmemPool_attribute cyattr cdef cydriver.CUmemoryPool cypool if pool is None: ppool = 0 @@ -36119,9 +36330,9 @@ def cuMemPoolSetAttribute(pool, attr not None : CUmemPool_attribute, value): else: ppool = int(CUmemoryPool(pool)) cypool = ppool - cdef cydriver.CUmemPool_attribute cyattr = int(attr) - cdef _HelperCUmemPool_attribute cyvalue = _HelperCUmemPool_attribute(attr, value, is_getter=False) - cdef void* cyvalue_ptr = cyvalue.cptr + cyattr = int(attr) + cyvalue = _HelperCUmemPool_attribute(attr, value, is_getter=False) + cyvalue_ptr = cyvalue.cptr with nogil: err = cydriver.cuMemPoolSetAttribute(cypool, cyattr, cyvalue_ptr) return (_CUresult(err),) @@ -36193,6 +36404,9 @@ def cuMemPoolGetAttribute(pool, attr not None : CUmemPool_attribute): -------- :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate` """ + cdef _HelperCUmemPool_attribute cyvalue + cdef void* cyvalue_ptr + cdef cydriver.CUmemPool_attribute cyattr cdef cydriver.CUmemoryPool cypool if pool is None: ppool = 0 @@ -36201,9 +36415,9 @@ def cuMemPoolGetAttribute(pool, attr not None : CUmemPool_attribute): else: ppool = int(CUmemoryPool(pool)) cypool = ppool - cdef cydriver.CUmemPool_attribute cyattr = int(attr) - cdef _HelperCUmemPool_attribute cyvalue = _HelperCUmemPool_attribute(attr, 0, is_getter=True) - cdef void* cyvalue_ptr = cyvalue.cptr + cyattr = int(attr) + cyvalue = _HelperCUmemPool_attribute(attr, 0, is_getter=True) + cyvalue_ptr = cyvalue.cptr with nogil: err = cydriver.cuMemPoolGetAttribute(cypool, cyattr, cyvalue_ptr) if err != cydriver.CUDA_SUCCESS: @@ -36236,31 +36450,33 @@ def cuMemPoolSetAccess(pool, map : Optional[tuple[CUmemAccessDesc] | list[CUmemA -------- :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate` """ - map = [] if map is None else map - if not all(isinstance(_x, (CUmemAccessDesc,)) for _x in map): - raise TypeError("Argument 'map' is not instance of type (expected tuple[cydriver.CUmemAccessDesc,] or list[cydriver.CUmemAccessDesc,]") - cdef cydriver.CUmemoryPool cypool - if pool is None: - ppool = 0 - elif isinstance(pool, (CUmemoryPool,)): - ppool = int(pool) - else: - ppool = int(CUmemoryPool(pool)) - cypool = ppool cdef cydriver.CUmemAccessDesc* cymap = NULL - if len(map) > 1: - cymap = calloc(len(map), sizeof(cydriver.CUmemAccessDesc)) - if cymap is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(map)) + 'x' + str(sizeof(cydriver.CUmemAccessDesc))) - for idx in range(len(map)): - string.memcpy(&cymap[idx], (map[idx])._pvt_ptr, sizeof(cydriver.CUmemAccessDesc)) - elif len(map) == 1: - cymap = (map[0])._pvt_ptr - if count > len(map): raise RuntimeError("List is too small: " + str(len(map)) + " < " + str(count)) - with nogil: - err = cydriver.cuMemPoolSetAccess(cypool, cymap, count) - if len(map) > 1 and cymap is not NULL: - free(cymap) + cdef cydriver.CUmemoryPool cypool + try: + if pool is None: + ppool = 0 + elif isinstance(pool, (CUmemoryPool,)): + ppool = int(pool) + else: + ppool = int(CUmemoryPool(pool)) + cypool = ppool + map = [] if map is None else map + if not all(isinstance(_x, (CUmemAccessDesc,)) for _x in map): + raise TypeError("Argument 'map' is not instance of type (expected tuple[cydriver.CUmemAccessDesc,] or list[cydriver.CUmemAccessDesc,]") + if len(map) > 1: + cymap = calloc(len(map), sizeof(cydriver.CUmemAccessDesc)) + if cymap is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(map)) + 'x' + str(sizeof(cydriver.CUmemAccessDesc))) + for idx in range(len(map)): + string.memcpy(&cymap[idx], (map[idx])._pvt_ptr, sizeof(cydriver.CUmemAccessDesc)) + elif len(map) == 1: + cymap = (map[0])._pvt_ptr + if count > len(map): raise RuntimeError("List is too small: " + str(len(map)) + " < " + str(count)) + with nogil: + err = cydriver.cuMemPoolSetAccess(cypool, cymap, count) + finally: + if len(map) > 1 and cymap is not NULL: + free(cymap) return (_CUresult(err),) {{endif}} @@ -36291,7 +36507,9 @@ def cuMemPoolGetAccess(memPool, location : Optional[CUmemLocation]): -------- :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate` """ + cdef cydriver.CUmemLocation* cylocation_ptr cdef cydriver.CUmemoryPool cymemPool + cdef cydriver.CUmemAccess_flags flags if memPool is None: pmemPool = 0 elif isinstance(memPool, (CUmemoryPool,)): @@ -36299,8 +36517,7 @@ def cuMemPoolGetAccess(memPool, location : Optional[CUmemLocation]): else: pmemPool = int(CUmemoryPool(memPool)) cymemPool = pmemPool - cdef cydriver.CUmemAccess_flags flags - cdef cydriver.CUmemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL + cylocation_ptr = location._pvt_ptr if location is not None else NULL with nogil: err = cydriver.cuMemPoolGetAccess(&flags, cymemPool, cylocation_ptr) if err != cydriver.CUDA_SUCCESS: @@ -36398,8 +36615,10 @@ def cuMemPoolCreate(poolProps : Optional[CUmemPoolProps]): ----- Specifying CU_MEM_HANDLE_TYPE_NONE creates a memory pool that will not support IPC. """ - cdef CUmemoryPool pool = CUmemoryPool() - cdef cydriver.CUmemPoolProps* cypoolProps_ptr = poolProps._pvt_ptr if poolProps is not None else NULL + cdef cydriver.CUmemPoolProps* cypoolProps_ptr + cdef CUmemoryPool pool + pool = CUmemoryPool() + cypoolProps_ptr = poolProps._pvt_ptr if poolProps is not None else NULL with nogil: err = cydriver.cuMemPoolCreate(pool._pvt_ptr, cypoolProps_ptr) if err != cydriver.CUDA_SUCCESS: @@ -36488,9 +36707,12 @@ def cuMemGetDefaultMemPool(location : Optional[CUmemLocation], typename not None -------- :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemPoolTrimTo`, :py:obj:`~.cuMemPoolGetAttribute`, :py:obj:`~.cuMemPoolSetAttribute`, :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cuMemGetMemPool`, :py:obj:`~.cuMemPoolCreate` """ - cdef CUmemoryPool pool_out = CUmemoryPool() - cdef cydriver.CUmemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL - cdef cydriver.CUmemAllocationType cytypename = int(typename) + cdef cydriver.CUmemAllocationType cytypename + cdef cydriver.CUmemLocation* cylocation_ptr + cdef CUmemoryPool pool_out + pool_out = CUmemoryPool() + cylocation_ptr = location._pvt_ptr if location is not None else NULL + cytypename = int(typename) with nogil: err = cydriver.cuMemGetDefaultMemPool(pool_out._pvt_ptr, cylocation_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: @@ -36541,9 +36763,12 @@ def cuMemGetMemPool(location : Optional[CUmemLocation], typename not None : CUme -------- :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuDeviceSetMemPool`, :py:obj:`~.cuMemSetMemPool` """ - cdef CUmemoryPool pool = CUmemoryPool() - cdef cydriver.CUmemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL - cdef cydriver.CUmemAllocationType cytypename = int(typename) + cdef cydriver.CUmemAllocationType cytypename + cdef cydriver.CUmemLocation* cylocation_ptr + cdef CUmemoryPool pool + pool = CUmemoryPool() + cylocation_ptr = location._pvt_ptr if location is not None else NULL + cytypename = int(typename) with nogil: err = cydriver.cuMemGetMemPool(pool._pvt_ptr, cylocation_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: @@ -36603,6 +36828,10 @@ def cuMemSetMemPool(location : Optional[CUmemLocation], typename not None : CUme Use :py:obj:`~.cuMemAllocFromPoolAsync` to specify asynchronous allocations from a device different than the one the stream runs on. """ cdef cydriver.CUmemoryPool cypool + cdef cydriver.CUmemAllocationType cytypename + cdef cydriver.CUmemLocation* cylocation_ptr + cylocation_ptr = location._pvt_ptr if location is not None else NULL + cytypename = int(typename) if pool is None: ppool = 0 elif isinstance(pool, (CUmemoryPool,)): @@ -36610,8 +36839,6 @@ def cuMemSetMemPool(location : Optional[CUmemLocation], typename not None : CUme else: ppool = int(CUmemoryPool(pool)) cypool = ppool - cdef cydriver.CUmemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL - cdef cydriver.CUmemAllocationType cytypename = int(typename) with nogil: err = cydriver.cuMemSetMemPool(cylocation_ptr, cytypename, cypool) return (_CUresult(err),) @@ -36653,14 +36880,9 @@ def cuMemAllocFromPoolAsync(size_t bytesize, pool, hStream): During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters. """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUmemoryPool cypool + cdef CUdeviceptr dptr + dptr = CUdeviceptr() if pool is None: ppool = 0 elif isinstance(pool, (CUmemoryPool,)): @@ -36668,7 +36890,13 @@ def cuMemAllocFromPoolAsync(size_t bytesize, pool, hStream): else: ppool = int(CUmemoryPool(pool)) cypool = ppool - cdef CUdeviceptr dptr = CUdeviceptr() + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemAllocFromPoolAsync(dptr._pvt_ptr, bytesize, cypool, cyhStream) if err != cydriver.CUDA_SUCCESS: @@ -36715,7 +36943,12 @@ def cuMemPoolExportToShareableHandle(pool, handleType not None : CUmemAllocation ----- : To create an IPC capable mempool, create a mempool with a CUmemAllocationHandleType other than CU_MEM_HANDLE_TYPE_NONE. """ + cdef cydriver.CUmemAllocationHandleType cyhandleType cdef cydriver.CUmemoryPool cypool + cdef _HelperCUmemAllocationHandleType cyhandle_out + cdef void* cyhandle_out_ptr + cyhandle_out = _HelperCUmemAllocationHandleType(handleType) + cyhandle_out_ptr = cyhandle_out.cptr if pool is None: ppool = 0 elif isinstance(pool, (CUmemoryPool,)): @@ -36723,9 +36956,7 @@ def cuMemPoolExportToShareableHandle(pool, handleType not None : CUmemAllocation else: ppool = int(CUmemoryPool(pool)) cypool = ppool - cdef _HelperCUmemAllocationHandleType cyhandle_out = _HelperCUmemAllocationHandleType(handleType) - cdef void* cyhandle_out_ptr = cyhandle_out.cptr - cdef cydriver.CUmemAllocationHandleType cyhandleType = int(handleType) + cyhandleType = int(handleType) with nogil: err = cydriver.cuMemPoolExportToShareableHandle(cyhandle_out_ptr, cypool, cyhandleType, flags) if err != cydriver.CUDA_SUCCESS: @@ -36771,13 +37002,18 @@ def cuMemPoolImportFromShareableHandle(handle, handleType not None : CUmemAlloca ----- Imported memory pools do not support creating new allocations. As such imported memory pools may not be used in cuDeviceSetMemPool or :py:obj:`~.cuMemAllocFromPoolAsync` calls. """ - cdef CUmemoryPool pool_out = CUmemoryPool() + cdef cydriver.CUmemAllocationHandleType cyhandleType cdef _HelperInputVoidPtrStruct cyhandleHelper - cdef void* cyhandle = _helper_input_void_ptr(handle, &cyhandleHelper) - cdef cydriver.CUmemAllocationHandleType cyhandleType = int(handleType) - with nogil: - err = cydriver.cuMemPoolImportFromShareableHandle(pool_out._pvt_ptr, cyhandle, cyhandleType, flags) - _helper_input_void_ptr_free(&cyhandleHelper) + cdef void* cyhandle + cdef CUmemoryPool pool_out + try: + pool_out = CUmemoryPool() + cyhandle = _helper_input_void_ptr(handle, &cyhandleHelper) + cyhandleType = int(handleType) + with nogil: + err = cydriver.cuMemPoolImportFromShareableHandle(pool_out._pvt_ptr, cyhandle, cyhandleType, flags) + finally: + _helper_input_void_ptr_free(&cyhandleHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, pool_out) @@ -36811,6 +37047,8 @@ def cuMemPoolExportPointer(ptr): :py:obj:`~.cuMemPoolExportToShareableHandle`, :py:obj:`~.cuMemPoolImportFromShareableHandle`, :py:obj:`~.cuMemPoolImportPointer` """ cdef cydriver.CUdeviceptr cyptr + cdef CUmemPoolPtrExportData shareData_out + shareData_out = CUmemPoolPtrExportData() if ptr is None: pptr = 0 elif isinstance(ptr, (CUdeviceptr,)): @@ -36818,7 +37056,6 @@ def cuMemPoolExportPointer(ptr): else: pptr = int(CUdeviceptr(ptr)) cyptr = pptr - cdef CUmemPoolPtrExportData shareData_out = CUmemPoolPtrExportData() with nogil: err = cydriver.cuMemPoolExportPointer(shareData_out._pvt_ptr, cyptr) if err != cydriver.CUDA_SUCCESS: @@ -36862,7 +37099,10 @@ def cuMemPoolImportPointer(pool, shareData : Optional[CUmemPoolPtrExportData]): ----- The cuMemFreeAsync api may be used in the exporting process before the cuMemFreeAsync operation completes in its stream as long as the cuMemFreeAsync in the exporting process specifies a stream with a stream dependency on the importing process's cuMemFreeAsync. """ + cdef cydriver.CUmemPoolPtrExportData* cyshareData_ptr cdef cydriver.CUmemoryPool cypool + cdef CUdeviceptr ptr_out + ptr_out = CUdeviceptr() if pool is None: ppool = 0 elif isinstance(pool, (CUmemoryPool,)): @@ -36870,8 +37110,7 @@ def cuMemPoolImportPointer(pool, shareData : Optional[CUmemPoolPtrExportData]): else: ppool = int(CUmemoryPool(pool)) cypool = ppool - cdef CUdeviceptr ptr_out = CUdeviceptr() - cdef cydriver.CUmemPoolPtrExportData* cyshareData_ptr = shareData._pvt_ptr if shareData is not None else NULL + cyshareData_ptr = shareData._pvt_ptr if shareData is not None else NULL with nogil: err = cydriver.cuMemPoolImportPointer(ptr_out._pvt_ptr, cypool, cyshareData_ptr) if err != cydriver.CUDA_SUCCESS: @@ -36933,8 +37172,10 @@ def cuMulticastCreate(prop : Optional[CUmulticastObjectProp]): :py:obj:`~.cuMulticastBindAddr_v2`, :py:obj:`~.cuMulticastBindMem_v2` """ - cdef CUmemGenericAllocationHandle mcHandle = CUmemGenericAllocationHandle() - cdef cydriver.CUmulticastObjectProp* cyprop_ptr = prop._pvt_ptr if prop is not None else NULL + cdef cydriver.CUmulticastObjectProp* cyprop_ptr + cdef CUmemGenericAllocationHandle mcHandle + mcHandle = CUmemGenericAllocationHandle() + cyprop_ptr = prop._pvt_ptr if prop is not None else NULL with nogil: err = cydriver.cuMulticastCreate(mcHandle._pvt_ptr, cyprop_ptr) if err != cydriver.CUDA_SUCCESS: @@ -36979,13 +37220,6 @@ def cuMulticastAddDevice(mcHandle, dev): :py:obj:`~.cuMulticastCreate`, :py:obj:`~.cuMulticastBindMem`, :py:obj:`~.cuMulticastBindAddr` """ cdef cydriver.CUdevice cydev - if dev is None: - pdev = 0 - elif isinstance(dev, (CUdevice,)): - pdev = int(dev) - else: - pdev = int(CUdevice(dev)) - cydev = pdev cdef cydriver.CUmemGenericAllocationHandle cymcHandle if mcHandle is None: pmcHandle = 0 @@ -36994,6 +37228,13 @@ def cuMulticastAddDevice(mcHandle, dev): else: pmcHandle = int(CUmemGenericAllocationHandle(mcHandle)) cymcHandle = pmcHandle + if dev is None: + pdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + else: + pdev = int(CUdevice(dev)) + cydev = pdev with nogil: err = cydriver.cuMulticastAddDevice(cymcHandle, cydev) return (_CUresult(err),) @@ -37061,13 +37302,6 @@ def cuMulticastBindMem(mcHandle, size_t mcOffset, memHandle, size_t memOffset, s :py:obj:`~.cuMulticastBindMem_v2` """ cdef cydriver.CUmemGenericAllocationHandle cymemHandle - if memHandle is None: - pmemHandle = 0 - elif isinstance(memHandle, (CUmemGenericAllocationHandle,)): - pmemHandle = int(memHandle) - else: - pmemHandle = int(CUmemGenericAllocationHandle(memHandle)) - cymemHandle = pmemHandle cdef cydriver.CUmemGenericAllocationHandle cymcHandle if mcHandle is None: pmcHandle = 0 @@ -37076,6 +37310,13 @@ def cuMulticastBindMem(mcHandle, size_t mcOffset, memHandle, size_t memOffset, s else: pmcHandle = int(CUmemGenericAllocationHandle(mcHandle)) cymcHandle = pmcHandle + if memHandle is None: + pmemHandle = 0 + elif isinstance(memHandle, (CUmemGenericAllocationHandle,)): + pmemHandle = int(memHandle) + else: + pmemHandle = int(CUmemGenericAllocationHandle(memHandle)) + cymemHandle = pmemHandle with nogil: err = cydriver.cuMulticastBindMem(cymcHandle, mcOffset, cymemHandle, memOffset, size, flags) return (_CUresult(err),) @@ -37154,21 +37395,7 @@ def cuMulticastBindMem_v2(mcHandle, dev, size_t mcOffset, memHandle, size_t memO :py:obj:`~.cuMulticastCreate`, :py:obj:`~.cuMulticastAddDevice`, :py:obj:`~.cuMemCreate` """ cdef cydriver.CUmemGenericAllocationHandle cymemHandle - if memHandle is None: - pmemHandle = 0 - elif isinstance(memHandle, (CUmemGenericAllocationHandle,)): - pmemHandle = int(memHandle) - else: - pmemHandle = int(CUmemGenericAllocationHandle(memHandle)) - cymemHandle = pmemHandle cdef cydriver.CUdevice cydev - if dev is None: - pdev = 0 - elif isinstance(dev, (CUdevice,)): - pdev = int(dev) - else: - pdev = int(CUdevice(dev)) - cydev = pdev cdef cydriver.CUmemGenericAllocationHandle cymcHandle if mcHandle is None: pmcHandle = 0 @@ -37177,6 +37404,20 @@ def cuMulticastBindMem_v2(mcHandle, dev, size_t mcOffset, memHandle, size_t memO else: pmcHandle = int(CUmemGenericAllocationHandle(mcHandle)) cymcHandle = pmcHandle + if dev is None: + pdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + else: + pdev = int(CUdevice(dev)) + cydev = pdev + if memHandle is None: + pmemHandle = 0 + elif isinstance(memHandle, (CUmemGenericAllocationHandle,)): + pmemHandle = int(memHandle) + else: + pmemHandle = int(CUmemGenericAllocationHandle(memHandle)) + cymemHandle = pmemHandle with nogil: err = cydriver.cuMulticastBindMem_v2(cymcHandle, cydev, mcOffset, cymemHandle, memOffset, size, flags) return (_CUresult(err),) @@ -37240,13 +37481,6 @@ def cuMulticastBindAddr(mcHandle, size_t mcOffset, memptr, size_t size, unsigned :py:obj:`~.cuMulticastBindAddr_v2` """ cdef cydriver.CUdeviceptr cymemptr - if memptr is None: - pmemptr = 0 - elif isinstance(memptr, (CUdeviceptr,)): - pmemptr = int(memptr) - else: - pmemptr = int(CUdeviceptr(memptr)) - cymemptr = pmemptr cdef cydriver.CUmemGenericAllocationHandle cymcHandle if mcHandle is None: pmcHandle = 0 @@ -37255,6 +37489,13 @@ def cuMulticastBindAddr(mcHandle, size_t mcOffset, memptr, size_t size, unsigned else: pmcHandle = int(CUmemGenericAllocationHandle(mcHandle)) cymcHandle = pmcHandle + if memptr is None: + pmemptr = 0 + elif isinstance(memptr, (CUdeviceptr,)): + pmemptr = int(memptr) + else: + pmemptr = int(CUdeviceptr(memptr)) + cymemptr = pmemptr with nogil: err = cydriver.cuMulticastBindAddr(cymcHandle, mcOffset, cymemptr, size, flags) return (_CUresult(err),) @@ -37327,21 +37568,7 @@ def cuMulticastBindAddr_v2(mcHandle, dev, size_t mcOffset, memptr, size_t size, :py:obj:`~.cuMulticastCreate`, :py:obj:`~.cuMulticastAddDevice`, :py:obj:`~.cuMemCreate` """ cdef cydriver.CUdeviceptr cymemptr - if memptr is None: - pmemptr = 0 - elif isinstance(memptr, (CUdeviceptr,)): - pmemptr = int(memptr) - else: - pmemptr = int(CUdeviceptr(memptr)) - cymemptr = pmemptr cdef cydriver.CUdevice cydev - if dev is None: - pdev = 0 - elif isinstance(dev, (CUdevice,)): - pdev = int(dev) - else: - pdev = int(CUdevice(dev)) - cydev = pdev cdef cydriver.CUmemGenericAllocationHandle cymcHandle if mcHandle is None: pmcHandle = 0 @@ -37350,6 +37577,20 @@ def cuMulticastBindAddr_v2(mcHandle, dev, size_t mcOffset, memptr, size_t size, else: pmcHandle = int(CUmemGenericAllocationHandle(mcHandle)) cymcHandle = pmcHandle + if dev is None: + pdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + else: + pdev = int(CUdevice(dev)) + cydev = pdev + if memptr is None: + pmemptr = 0 + elif isinstance(memptr, (CUdeviceptr,)): + pmemptr = int(memptr) + else: + pmemptr = int(CUdeviceptr(memptr)) + cymemptr = pmemptr with nogil: err = cydriver.cuMulticastBindAddr_v2(cymcHandle, cydev, mcOffset, cymemptr, size, flags) return (_CUresult(err),) @@ -37396,13 +37637,6 @@ def cuMulticastUnbind(mcHandle, dev, size_t mcOffset, size_t size): Warning: The `mcOffset` and the `size` must match the corresponding values specified during the bind call. Any other values may result in undefined behavior. """ cdef cydriver.CUdevice cydev - if dev is None: - pdev = 0 - elif isinstance(dev, (CUdevice,)): - pdev = int(dev) - else: - pdev = int(CUdevice(dev)) - cydev = pdev cdef cydriver.CUmemGenericAllocationHandle cymcHandle if mcHandle is None: pmcHandle = 0 @@ -37411,6 +37645,13 @@ def cuMulticastUnbind(mcHandle, dev, size_t mcOffset, size_t size): else: pmcHandle = int(CUmemGenericAllocationHandle(mcHandle)) cymcHandle = pmcHandle + if dev is None: + pdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + else: + pdev = int(CUdevice(dev)) + cydev = pdev with nogil: err = cydriver.cuMulticastUnbind(cymcHandle, cydev, mcOffset, size) return (_CUresult(err),) @@ -37447,9 +37688,11 @@ def cuMulticastGetGranularity(prop : Optional[CUmulticastObjectProp], option not :py:obj:`~.cuMulticastBindMem_v2`, :py:obj:`~.cuMulticastBindAddr_v2` """ + cdef cydriver.CUmulticastGranularity_flags cyoption + cdef cydriver.CUmulticastObjectProp* cyprop_ptr cdef size_t granularity = 0 - cdef cydriver.CUmulticastObjectProp* cyprop_ptr = prop._pvt_ptr if prop is not None else NULL - cdef cydriver.CUmulticastGranularity_flags cyoption = int(option) + cyprop_ptr = prop._pvt_ptr if prop is not None else NULL + cyoption = int(option) with nogil: err = cydriver.cuMulticastGetGranularity(&granularity, cyprop_ptr, cyoption) if err != cydriver.CUDA_SUCCESS: @@ -37653,6 +37896,12 @@ def cuPointerGetAttribute(attribute not None : CUpointer_attribute, ptr): :py:obj:`~.cuPointerSetAttribute`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cuMemHostUnregister`, :py:obj:`~.cudaPointerGetAttributes` """ cdef cydriver.CUdeviceptr cyptr + cdef cydriver.CUpointer_attribute cyattribute + cdef _HelperCUpointer_attribute cydata + cdef void* cydata_ptr + cydata = _HelperCUpointer_attribute(attribute, 0, is_getter=True) + cydata_ptr = cydata.cptr + cyattribute = int(attribute) if ptr is None: pptr = 0 elif isinstance(ptr, (CUdeviceptr,)): @@ -37660,9 +37909,6 @@ def cuPointerGetAttribute(attribute not None : CUpointer_attribute, ptr): else: pptr = int(CUdeviceptr(ptr)) cyptr = pptr - cdef _HelperCUpointer_attribute cydata = _HelperCUpointer_attribute(attribute, 0, is_getter=True) - cdef void* cydata_ptr = cydata.cptr - cdef cydriver.CUpointer_attribute cyattribute = int(attribute) with nogil: err = cydriver.cuPointerGetAttribute(cydata_ptr, cyattribute, cyptr) if err != cydriver.CUDA_SUCCESS: @@ -37773,13 +38019,6 @@ def cuMemPrefetchAsync(devPtr, size_t count, location not None : CUmemLocation, :py:obj:`~.cuMemcpy`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cuMemAdvise`, :py:obj:`~.cudaMemPrefetchAsync` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUdeviceptr cydevPtr if devPtr is None: pdevPtr = 0 @@ -37788,6 +38027,13 @@ def cuMemPrefetchAsync(devPtr, size_t count, location not None : CUmemLocation, else: pdevPtr = int(CUdeviceptr(devPtr)) cydevPtr = pdevPtr + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuMemPrefetchAsync(cydevPtr, count, location._pvt_ptr[0], flags, cyhStream) return (_CUresult(err),) @@ -37984,6 +38230,7 @@ def cuMemAdvise(devPtr, size_t count, advice not None : CUmem_advise, location n -------- :py:obj:`~.cuMemcpy`, :py:obj:`~.cuMemcpyPeer`, :py:obj:`~.cuMemcpyAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`, :py:obj:`~.cuMemPrefetchAsync`, :py:obj:`~.cudaMemAdvise` """ + cdef cydriver.CUmem_advise cyadvice cdef cydriver.CUdeviceptr cydevPtr if devPtr is None: pdevPtr = 0 @@ -37992,7 +38239,7 @@ def cuMemAdvise(devPtr, size_t count, advice not None : CUmem_advise, location n else: pdevPtr = int(CUdeviceptr(devPtr)) cydevPtr = pdevPtr - cdef cydriver.CUmem_advise cyadvice = int(advice) + cyadvice = int(advice) with nogil: err = cydriver.cuMemAdvise(cydevPtr, count, cyadvice, location._pvt_ptr[0]) return (_CUresult(err),) @@ -38067,54 +38314,58 @@ def cuMemPrefetchBatchAsync(dptrs : Optional[tuple[CUdeviceptr] | list[CUdevicep """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs): - raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]") - prefetchLocs = [] if prefetchLocs is None else prefetchLocs - if not all(isinstance(_x, (CUmemLocation,)) for _x in prefetchLocs): - raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cydriver.CUmemLocation,] or list[cydriver.CUmemLocation,]") - if not all(isinstance(_x, (int)) for _x in sizes): - raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") - dptrs = [] if dptrs is None else dptrs - if not all(isinstance(_x, (CUdeviceptr,)) for _x in dptrs): - raise TypeError("Argument 'dptrs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") - cdef cydriver.CUdeviceptr* cydptrs = NULL - if len(dptrs) > 1: - cydptrs = calloc(len(dptrs), sizeof(cydriver.CUdeviceptr)) - if cydptrs is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dptrs)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) - else: - for idx in range(len(dptrs)): - cydptrs[idx] = (dptrs[idx])._pvt_ptr[0] - elif len(dptrs) == 1: - cydptrs = (dptrs[0])._pvt_ptr - cdef vector[size_t] cysizes = sizes - if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) - if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + cdef vector[size_t] cyprefetchLocIdxs cdef cydriver.CUmemLocation* cyprefetchLocs = NULL - if len(prefetchLocs) > 1: - cyprefetchLocs = calloc(len(prefetchLocs), sizeof(cydriver.CUmemLocation)) - if cyprefetchLocs is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cydriver.CUmemLocation))) - for idx in range(len(prefetchLocs)): - string.memcpy(&cyprefetchLocs[idx], (prefetchLocs[idx])._pvt_ptr, sizeof(cydriver.CUmemLocation)) - elif len(prefetchLocs) == 1: - cyprefetchLocs = (prefetchLocs[0])._pvt_ptr - cdef vector[size_t] cyprefetchLocIdxs = prefetchLocIdxs - if numPrefetchLocs > len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs)) - if numPrefetchLocs > len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs)) - with nogil: - err = cydriver.cuMemPrefetchBatchAsync(cydptrs, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cyhStream) - if len(dptrs) > 1 and cydptrs is not NULL: - free(cydptrs) - if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL: - free(cyprefetchLocs) + cdef vector[size_t] cysizes + cdef cydriver.CUdeviceptr* cydptrs = NULL + try: + dptrs = [] if dptrs is None else dptrs + if not all(isinstance(_x, (CUdeviceptr,)) for _x in dptrs): + raise TypeError("Argument 'dptrs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") + if len(dptrs) > 1: + cydptrs = calloc(len(dptrs), sizeof(cydriver.CUdeviceptr)) + if cydptrs is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dptrs)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) + else: + for idx in range(len(dptrs)): + cydptrs[idx] = (dptrs[idx])._pvt_ptr[0] + elif len(dptrs) == 1: + cydptrs = (dptrs[0])._pvt_ptr + if not all(isinstance(_x, (int)) for _x in sizes): + raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") + cysizes = sizes + if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) + if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + prefetchLocs = [] if prefetchLocs is None else prefetchLocs + if not all(isinstance(_x, (CUmemLocation,)) for _x in prefetchLocs): + raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cydriver.CUmemLocation,] or list[cydriver.CUmemLocation,]") + if len(prefetchLocs) > 1: + cyprefetchLocs = calloc(len(prefetchLocs), sizeof(cydriver.CUmemLocation)) + if cyprefetchLocs is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cydriver.CUmemLocation))) + for idx in range(len(prefetchLocs)): + string.memcpy(&cyprefetchLocs[idx], (prefetchLocs[idx])._pvt_ptr, sizeof(cydriver.CUmemLocation)) + elif len(prefetchLocs) == 1: + cyprefetchLocs = (prefetchLocs[0])._pvt_ptr + if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs): + raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]") + cyprefetchLocIdxs = prefetchLocIdxs + if numPrefetchLocs > len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs)) + if numPrefetchLocs > len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs)) + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + with nogil: + err = cydriver.cuMemPrefetchBatchAsync(cydptrs, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cyhStream) + finally: + if len(dptrs) > 1 and cydptrs is not NULL: + free(cydptrs) + if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL: + free(cyprefetchLocs) return (_CUresult(err),) {{endif}} @@ -38171,35 +38422,38 @@ def cuMemDiscardBatchAsync(dptrs : Optional[tuple[CUdeviceptr] | list[CUdevicept """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - if not all(isinstance(_x, (int)) for _x in sizes): - raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") - dptrs = [] if dptrs is None else dptrs - if not all(isinstance(_x, (CUdeviceptr,)) for _x in dptrs): - raise TypeError("Argument 'dptrs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") + cdef vector[size_t] cysizes cdef cydriver.CUdeviceptr* cydptrs = NULL - if len(dptrs) > 1: - cydptrs = calloc(len(dptrs), sizeof(cydriver.CUdeviceptr)) - if cydptrs is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dptrs)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) - else: - for idx in range(len(dptrs)): - cydptrs[idx] = (dptrs[idx])._pvt_ptr[0] - elif len(dptrs) == 1: - cydptrs = (dptrs[0])._pvt_ptr - cdef vector[size_t] cysizes = sizes - if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) - if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) - with nogil: - err = cydriver.cuMemDiscardBatchAsync(cydptrs, cysizes.data(), count, flags, cyhStream) - if len(dptrs) > 1 and cydptrs is not NULL: - free(cydptrs) + try: + dptrs = [] if dptrs is None else dptrs + if not all(isinstance(_x, (CUdeviceptr,)) for _x in dptrs): + raise TypeError("Argument 'dptrs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") + if len(dptrs) > 1: + cydptrs = calloc(len(dptrs), sizeof(cydriver.CUdeviceptr)) + if cydptrs is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dptrs)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) + else: + for idx in range(len(dptrs)): + cydptrs[idx] = (dptrs[idx])._pvt_ptr[0] + elif len(dptrs) == 1: + cydptrs = (dptrs[0])._pvt_ptr + if not all(isinstance(_x, (int)) for _x in sizes): + raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") + cysizes = sizes + if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) + if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + with nogil: + err = cydriver.cuMemDiscardBatchAsync(cydptrs, cysizes.data(), count, flags, cyhStream) + finally: + if len(dptrs) > 1 and cydptrs is not NULL: + free(cydptrs) return (_CUresult(err),) {{endif}} @@ -38280,54 +38534,58 @@ def cuMemDiscardAndPrefetchBatchAsync(dptrs : Optional[tuple[CUdeviceptr] | list """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs): - raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]") - prefetchLocs = [] if prefetchLocs is None else prefetchLocs - if not all(isinstance(_x, (CUmemLocation,)) for _x in prefetchLocs): - raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cydriver.CUmemLocation,] or list[cydriver.CUmemLocation,]") - if not all(isinstance(_x, (int)) for _x in sizes): - raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") - dptrs = [] if dptrs is None else dptrs - if not all(isinstance(_x, (CUdeviceptr,)) for _x in dptrs): - raise TypeError("Argument 'dptrs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") - cdef cydriver.CUdeviceptr* cydptrs = NULL - if len(dptrs) > 1: - cydptrs = calloc(len(dptrs), sizeof(cydriver.CUdeviceptr)) - if cydptrs is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dptrs)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) - else: - for idx in range(len(dptrs)): - cydptrs[idx] = (dptrs[idx])._pvt_ptr[0] - elif len(dptrs) == 1: - cydptrs = (dptrs[0])._pvt_ptr - cdef vector[size_t] cysizes = sizes - if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) - if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + cdef vector[size_t] cyprefetchLocIdxs cdef cydriver.CUmemLocation* cyprefetchLocs = NULL - if len(prefetchLocs) > 1: - cyprefetchLocs = calloc(len(prefetchLocs), sizeof(cydriver.CUmemLocation)) - if cyprefetchLocs is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cydriver.CUmemLocation))) - for idx in range(len(prefetchLocs)): - string.memcpy(&cyprefetchLocs[idx], (prefetchLocs[idx])._pvt_ptr, sizeof(cydriver.CUmemLocation)) - elif len(prefetchLocs) == 1: - cyprefetchLocs = (prefetchLocs[0])._pvt_ptr - cdef vector[size_t] cyprefetchLocIdxs = prefetchLocIdxs - if numPrefetchLocs > len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs)) - if numPrefetchLocs > len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs)) - with nogil: - err = cydriver.cuMemDiscardAndPrefetchBatchAsync(cydptrs, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cyhStream) - if len(dptrs) > 1 and cydptrs is not NULL: - free(cydptrs) - if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL: - free(cyprefetchLocs) + cdef vector[size_t] cysizes + cdef cydriver.CUdeviceptr* cydptrs = NULL + try: + dptrs = [] if dptrs is None else dptrs + if not all(isinstance(_x, (CUdeviceptr,)) for _x in dptrs): + raise TypeError("Argument 'dptrs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") + if len(dptrs) > 1: + cydptrs = calloc(len(dptrs), sizeof(cydriver.CUdeviceptr)) + if cydptrs is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dptrs)) + 'x' + str(sizeof(cydriver.CUdeviceptr))) + else: + for idx in range(len(dptrs)): + cydptrs[idx] = (dptrs[idx])._pvt_ptr[0] + elif len(dptrs) == 1: + cydptrs = (dptrs[0])._pvt_ptr + if not all(isinstance(_x, (int)) for _x in sizes): + raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") + cysizes = sizes + if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) + if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + prefetchLocs = [] if prefetchLocs is None else prefetchLocs + if not all(isinstance(_x, (CUmemLocation,)) for _x in prefetchLocs): + raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cydriver.CUmemLocation,] or list[cydriver.CUmemLocation,]") + if len(prefetchLocs) > 1: + cyprefetchLocs = calloc(len(prefetchLocs), sizeof(cydriver.CUmemLocation)) + if cyprefetchLocs is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cydriver.CUmemLocation))) + for idx in range(len(prefetchLocs)): + string.memcpy(&cyprefetchLocs[idx], (prefetchLocs[idx])._pvt_ptr, sizeof(cydriver.CUmemLocation)) + elif len(prefetchLocs) == 1: + cyprefetchLocs = (prefetchLocs[0])._pvt_ptr + if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs): + raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]") + cyprefetchLocIdxs = prefetchLocIdxs + if numPrefetchLocs > len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs)) + if numPrefetchLocs > len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs)) + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + with nogil: + err = cydriver.cuMemDiscardAndPrefetchBatchAsync(cydptrs, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cyhStream) + finally: + if len(dptrs) > 1 and cydptrs is not NULL: + free(cydptrs) + if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL: + free(cyprefetchLocs) return (_CUresult(err),) {{endif}} @@ -38472,6 +38730,12 @@ def cuMemRangeGetAttribute(size_t dataSize, attribute not None : CUmem_range_att :py:obj:`~.cuMemRangeGetAttributes`, :py:obj:`~.cuMemPrefetchAsync`, :py:obj:`~.cuMemAdvise`, :py:obj:`~.cudaMemRangeGetAttribute` """ cdef cydriver.CUdeviceptr cydevPtr + cdef cydriver.CUmem_range_attribute cyattribute + cdef _HelperCUmem_range_attribute cydata + cdef void* cydata_ptr + cydata = _HelperCUmem_range_attribute(attribute, dataSize) + cydata_ptr = cydata.cptr + cyattribute = int(attribute) if devPtr is None: pdevPtr = 0 elif isinstance(devPtr, (CUdeviceptr,)): @@ -38479,9 +38743,6 @@ def cuMemRangeGetAttribute(size_t dataSize, attribute not None : CUmem_range_att else: pdevPtr = int(CUdeviceptr(devPtr)) cydevPtr = pdevPtr - cdef _HelperCUmem_range_attribute cydata = _HelperCUmem_range_attribute(attribute, dataSize) - cdef void* cydata_ptr = cydata.cptr - cdef cydriver.CUmem_range_attribute cyattribute = int(attribute) with nogil: err = cydriver.cuMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr, count) if err != cydriver.CUDA_SUCCESS: @@ -38550,6 +38811,22 @@ def cuMemRangeGetAttributes(dataSizes : tuple[int] | list[int], attributes : Opt :py:obj:`~.cuMemRangeGetAttribute`, :py:obj:`~.cuMemAdvise`, :py:obj:`~.cuMemPrefetchAsync`, :py:obj:`~.cudaMemRangeGetAttributes` """ cdef cydriver.CUdeviceptr cydevPtr + cdef vector[cydriver.CUmem_range_attribute] cyattributes + cdef vector[size_t] cydataSizes + cdef _InputVoidPtrPtrHelper voidStarHelperdata + cdef void** cyvoidStarHelper_ptr + pylist = [_HelperCUmem_range_attribute(pyattributes, pydataSizes) for (pyattributes, pydataSizes) in zip(attributes, dataSizes)] + voidStarHelperdata = _InputVoidPtrPtrHelper(pylist) + cyvoidStarHelper_ptr = voidStarHelperdata.cptr + if not all(isinstance(_x, (int)) for _x in dataSizes): + raise TypeError("Argument 'dataSizes' is not instance of type (expected tuple[int] or list[int]") + cydataSizes = dataSizes + attributes = [] if attributes is None else attributes + if not all(isinstance(_x, (CUmem_range_attribute)) for _x in attributes): + raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cydriver.CUmem_range_attribute] or list[cydriver.CUmem_range_attribute]") + cyattributes = attributes + if numAttributes > len(dataSizes): raise RuntimeError("List is too small: " + str(len(dataSizes)) + " < " + str(numAttributes)) + if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes)) if devPtr is None: pdevPtr = 0 elif isinstance(devPtr, (CUdeviceptr,)): @@ -38557,18 +38834,6 @@ def cuMemRangeGetAttributes(dataSizes : tuple[int] | list[int], attributes : Opt else: pdevPtr = int(CUdeviceptr(devPtr)) cydevPtr = pdevPtr - attributes = [] if attributes is None else attributes - if not all(isinstance(_x, (CUmem_range_attribute)) for _x in attributes): - raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cydriver.CUmem_range_attribute] or list[cydriver.CUmem_range_attribute]") - if not all(isinstance(_x, (int)) for _x in dataSizes): - raise TypeError("Argument 'dataSizes' is not instance of type (expected tuple[int] or list[int]") - pylist = [_HelperCUmem_range_attribute(pyattributes, pydataSizes) for (pyattributes, pydataSizes) in zip(attributes, dataSizes)] - cdef _InputVoidPtrPtrHelper voidStarHelperdata = _InputVoidPtrPtrHelper(pylist) - cdef void** cyvoidStarHelper_ptr = voidStarHelperdata.cptr - cdef vector[size_t] cydataSizes = dataSizes - cdef vector[cydriver.CUmem_range_attribute] cyattributes = [int(pyattributes) for pyattributes in (attributes)] - if numAttributes > len(dataSizes): raise RuntimeError("List is too small: " + str(len(dataSizes)) + " < " + str(numAttributes)) - if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes)) with nogil: err = cydriver.cuMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr, count) if err != cydriver.CUDA_SUCCESS: @@ -38617,6 +38882,12 @@ def cuPointerSetAttribute(value, attribute not None : CUpointer_attribute, ptr): :py:obj:`~.cuPointerGetAttribute`, :py:obj:`~.cuPointerGetAttributes`, :py:obj:`~.cuMemAlloc`, :py:obj:`~.cuMemFree`, :py:obj:`~.cuMemAllocHost`, :py:obj:`~.cuMemFreeHost`, :py:obj:`~.cuMemHostAlloc`, :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cuMemHostUnregister` """ cdef cydriver.CUdeviceptr cyptr + cdef cydriver.CUpointer_attribute cyattribute + cdef _HelperCUpointer_attribute cyvalue + cdef void* cyvalue_ptr + cyvalue = _HelperCUpointer_attribute(attribute, value, is_getter=False) + cyvalue_ptr = cyvalue.cptr + cyattribute = int(attribute) if ptr is None: pptr = 0 elif isinstance(ptr, (CUdeviceptr,)): @@ -38624,9 +38895,6 @@ def cuPointerSetAttribute(value, attribute not None : CUpointer_attribute, ptr): else: pptr = int(CUdeviceptr(ptr)) cyptr = pptr - cdef _HelperCUpointer_attribute cyvalue = _HelperCUpointer_attribute(attribute, value, is_getter=False) - cdef void* cyvalue_ptr = cyvalue.cptr - cdef cydriver.CUpointer_attribute cyattribute = int(attribute) with nogil: err = cydriver.cuPointerSetAttribute(cyvalue_ptr, cyattribute, cyptr) return (_CUresult(err),) @@ -38704,6 +38972,17 @@ def cuPointerGetAttributes(unsigned int numAttributes, attributes : Optional[tup :py:obj:`~.cuPointerGetAttribute`, :py:obj:`~.cuPointerSetAttribute`, :py:obj:`~.cudaPointerGetAttributes` """ cdef cydriver.CUdeviceptr cyptr + cdef _InputVoidPtrPtrHelper voidStarHelperdata + cdef void** cyvoidStarHelper_ptr + cdef vector[cydriver.CUpointer_attribute] cyattributes + if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes)) + attributes = [] if attributes is None else attributes + if not all(isinstance(_x, (CUpointer_attribute)) for _x in attributes): + raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cydriver.CUpointer_attribute] or list[cydriver.CUpointer_attribute]") + cyattributes = attributes + pylist = [_HelperCUpointer_attribute(pyattributes, 0, is_getter=True) for pyattributes in attributes] + voidStarHelperdata = _InputVoidPtrPtrHelper(pylist) + cyvoidStarHelper_ptr = voidStarHelperdata.cptr if ptr is None: pptr = 0 elif isinstance(ptr, (CUdeviceptr,)): @@ -38711,14 +38990,6 @@ def cuPointerGetAttributes(unsigned int numAttributes, attributes : Optional[tup else: pptr = int(CUdeviceptr(ptr)) cyptr = pptr - attributes = [] if attributes is None else attributes - if not all(isinstance(_x, (CUpointer_attribute)) for _x in attributes): - raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cydriver.CUpointer_attribute] or list[cydriver.CUpointer_attribute]") - if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes)) - cdef vector[cydriver.CUpointer_attribute] cyattributes = [int(pyattributes) for pyattributes in (attributes)] - pylist = [_HelperCUpointer_attribute(pyattributes, 0, is_getter=True) for pyattributes in attributes] - cdef _InputVoidPtrPtrHelper voidStarHelperdata = _InputVoidPtrPtrHelper(pylist) - cdef void** cyvoidStarHelper_ptr = voidStarHelperdata.cptr with nogil: err = cydriver.cuPointerGetAttributes(numAttributes, cyattributes.data(), cyvoidStarHelper_ptr, cyptr) if err != cydriver.CUDA_SUCCESS: @@ -38760,7 +39031,8 @@ def cuStreamCreate(unsigned int Flags): -------- :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamGetDevice` :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags` """ - cdef CUstream phStream = CUstream() + cdef CUstream phStream + phStream = CUstream() with nogil: err = cydriver.cuStreamCreate(phStream._pvt_ptr, Flags) if err != cydriver.CUDA_SUCCESS: @@ -38815,7 +39087,8 @@ def cuStreamCreateWithPriority(unsigned int flags, int priority): In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations. """ - cdef CUstream phStream = CUstream() + cdef CUstream phStream + phStream = CUstream() with nogil: err = cydriver.cuStreamCreateWithPriority(phStream._pvt_ptr, flags, priority) if err != cydriver.CUDA_SUCCESS: @@ -38855,6 +39128,7 @@ def cuStreamGetPriority(hStream): -------- :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuCtxGetStreamPriorityRange`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamGetDevice`, :py:obj:`~.cudaStreamGetPriority` """ + cdef int priority = 0 cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -38863,7 +39137,6 @@ def cuStreamGetPriority(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef int priority = 0 with nogil: err = cydriver.cuStreamGetPriority(cyhStream, &priority) if err != cydriver.CUDA_SUCCESS: @@ -38895,6 +39168,7 @@ def cuStreamGetDevice(hStream): -------- :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetFlags` """ + cdef CUdevice device cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -38903,7 +39177,7 @@ def cuStreamGetDevice(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef CUdevice device = CUdevice() + device = CUdevice() with nogil: err = cydriver.cuStreamGetDevice(cyhStream, device._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -38940,6 +39214,7 @@ def cuStreamGetFlags(hStream): -------- :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cuStreamGetDevice` """ + cdef unsigned int flags = 0 cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -38948,7 +39223,6 @@ def cuStreamGetFlags(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef unsigned int flags = 0 with nogil: err = cydriver.cuStreamGetFlags(cyhStream, &flags) if err != cydriver.CUDA_SUCCESS: @@ -38997,6 +39271,7 @@ def cuStreamGetId(hStream): -------- :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cudaStreamGetId` """ + cdef unsigned long long streamId = 0 cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -39005,7 +39280,6 @@ def cuStreamGetId(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef unsigned long long streamId = 0 with nogil: err = cydriver.cuStreamGetId(cyhStream, &streamId) if err != cydriver.CUDA_SUCCESS: @@ -39062,6 +39336,7 @@ def cuStreamGetCtx(hStream): -------- :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamGetDevice` :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags` """ + cdef CUcontext pctx cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -39070,7 +39345,7 @@ def cuStreamGetCtx(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef CUcontext pctx = CUcontext() + pctx = CUcontext() with nogil: err = cydriver.cuStreamGetCtx(cyhStream, pctx._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -39139,6 +39414,8 @@ def cuStreamGetCtx_v2(hStream): -------- :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate` :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamGetDevice`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, """ + cdef CUgreenCtx pGreenCtx + cdef CUcontext pCtx cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -39147,8 +39424,8 @@ def cuStreamGetCtx_v2(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef CUcontext pCtx = CUcontext() - cdef CUgreenCtx pGreenCtx = CUgreenCtx() + pCtx = CUcontext() + pGreenCtx = CUgreenCtx() with nogil: err = cydriver.cuStreamGetCtx_v2(cyhStream, pCtx._pvt_ptr, pGreenCtx._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -39195,13 +39472,6 @@ def cuStreamWaitEvent(hStream, hEvent, unsigned int Flags): :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cudaStreamWaitEvent` """ cdef cydriver.CUevent cyhEvent - if hEvent is None: - phEvent = 0 - elif isinstance(hEvent, (CUevent,)): - phEvent = int(hEvent) - else: - phEvent = int(CUevent(hEvent)) - cyhEvent = phEvent cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -39210,6 +39480,13 @@ def cuStreamWaitEvent(hStream, hEvent, unsigned int Flags): else: phStream = int(CUstream(hStream)) cyhStream = phStream + if hEvent is None: + phEvent = 0 + elif isinstance(hEvent, (CUevent,)): + phEvent = int(hEvent) + else: + phEvent = int(CUevent(hEvent)) + cyhEvent = phEvent with nogil: err = cydriver.cuStreamWaitEvent(cyhStream, cyhEvent, Flags) return (_CUresult(err),) @@ -39299,37 +39576,39 @@ def cuStreamAddCallback(hStream, callback, userData, unsigned int flags): ----- This function is slated for eventual deprecation and removal. If you do not require the callback to execute in case of a device error, consider using :py:obj:`~.cuLaunchHostFunc`. Additionally, this function is not supported with :py:obj:`~.cuStreamBeginCapture` and :py:obj:`~.cuStreamEndCapture`, unlike :py:obj:`~.cuLaunchHostFunc`. """ - cdef cydriver.CUstreamCallback cycallback - if callback is None: - pcallback = 0 - elif isinstance(callback, (CUstreamCallback,)): - pcallback = int(callback) - else: - pcallback = int(CUstreamCallback(callback)) - cycallback = pcallback - cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef _HelperInputVoidPtrStruct cyuserDataHelper - cdef void* cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) - + cdef void* cyuserData cdef cuStreamCallbackData *cbData = NULL - cbData = malloc(sizeof(cbData[0])) - if cbData == NULL: - return (CUresult.CUDA_ERROR_OUT_OF_MEMORY,) - cbData.callback = cycallback - cbData.userData = cyuserData - - with nogil: - err = cydriver.cuStreamAddCallback(cyhStream, cuStreamCallbackWrapper, cbData, flags) - if err != cydriver.CUDA_SUCCESS: - free(cbData) - _helper_input_void_ptr_free(&cyuserDataHelper) + cdef cydriver.CUstreamCallback cycallback + cdef cydriver.CUstream cyhStream + try: + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + if callback is None: + pcallback = 0 + elif isinstance(callback, (CUstreamCallback,)): + pcallback = int(callback) + else: + pcallback = int(CUstreamCallback(callback)) + cycallback = pcallback + cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) + cbData = malloc(sizeof(cbData[0])) + if cbData == NULL: + return (CUresult.CUDA_ERROR_OUT_OF_MEMORY,) + cbData.callback = cycallback + cbData.userData = cyuserData + + with nogil: + err = cydriver.cuStreamAddCallback(cyhStream, cuStreamCallbackWrapper, cbData, flags) + finally: + if err != cydriver.CUDA_SUCCESS: + free(cbData) + _helper_input_void_ptr_free(&cyuserDataHelper) return (_CUresult(err),) {{endif}} @@ -39375,6 +39654,7 @@ def cuStreamBeginCapture(hStream, mode not None : CUstreamCaptureMode): ----- Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects. """ + cdef cydriver.CUstreamCaptureMode cymode cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -39383,7 +39663,7 @@ def cuStreamBeginCapture(hStream, mode not None : CUstreamCaptureMode): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef cydriver.CUstreamCaptureMode cymode = int(mode) + cymode = int(mode) with nogil: err = cydriver.cuStreamBeginCapture(cyhStream, cymode) return (_CUresult(err),) @@ -39443,55 +39723,58 @@ def cuStreamBeginCaptureToGraph(hStream, hGraph, dependencies : Optional[tuple[C ----- Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects. """ - dependencyData = [] if dependencyData is None else dependencyData - if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData): - raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + cdef cydriver.CUstreamCaptureMode cymode + cdef cydriver.CUgraphEdgeData* cydependencyData = NULL + cdef cydriver.CUgraphNode* cydependencies = NULL cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - cdef cydriver.CUgraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 1: - cydependencyData = calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData)) - if cydependencyData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - for idx in range(len(dependencyData)): - string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) - elif len(dependencyData) == 1: - cydependencyData = (dependencyData[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - cdef cydriver.CUstreamCaptureMode cymode = int(mode) - with nogil: - err = cydriver.cuStreamBeginCaptureToGraph(cyhStream, cyhGraph, cydependencies, cydependencyData, numDependencies, cymode) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) - if len(dependencyData) > 1 and cydependencyData is not NULL: - free(cydependencyData) + try: + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + dependencyData = [] if dependencyData is None else dependencyData + if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData): + raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") + if len(dependencyData) > 1: + cydependencyData = calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData)) + if cydependencyData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) + for idx in range(len(dependencyData)): + string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + cymode = int(mode) + with nogil: + err = cydriver.cuStreamBeginCaptureToGraph(cyhStream, cyhGraph, cydependencies, cydependencyData, numDependencies, cymode) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) + if len(dependencyData) > 1 and cydependencyData is not NULL: + free(cydependencyData) return (_CUresult(err),) {{endif}} @@ -39561,7 +39844,8 @@ def cuThreadExchangeStreamCaptureMode(mode not None : CUstreamCaptureMode): -------- :py:obj:`~.cuStreamBeginCapture` """ - cdef cydriver.CUstreamCaptureMode cymode = int(mode) + cdef cydriver.CUstreamCaptureMode cymode + cymode = int(mode) with nogil: err = cydriver.cuThreadExchangeStreamCaptureMode(&cymode) if err != cydriver.CUDA_SUCCESS: @@ -39601,6 +39885,7 @@ def cuStreamEndCapture(hStream): -------- :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamIsCapturing`, :py:obj:`~.cuGraphDestroy` """ + cdef CUgraph phGraph cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -39609,7 +39894,7 @@ def cuStreamEndCapture(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef CUgraph phGraph = CUgraph() + phGraph = CUgraph() with nogil: err = cydriver.cuStreamEndCapture(cyhStream, phGraph._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -39664,6 +39949,7 @@ def cuStreamIsCapturing(hStream): -------- :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamEndCapture` """ + cdef cydriver.CUstreamCaptureStatus captureStatus cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -39672,7 +39958,6 @@ def cuStreamIsCapturing(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef cydriver.CUstreamCaptureStatus captureStatus with nogil: err = cydriver.cuStreamIsCapturing(cyhStream, &captureStatus) if err != cydriver.CUDA_SUCCESS: @@ -39753,28 +40038,32 @@ def cuStreamGetCaptureInfo(hStream): -------- :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamIsCapturing`, :py:obj:`~.cuStreamUpdateCaptureDependencies` """ - cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - cdef cydriver.CUstreamCaptureStatus captureStatus_out - cdef cuuint64_t id_out = cuuint64_t() - cdef CUgraph graph_out = CUgraph() - cdef const cydriver.CUgraphNode* cydependencies_out = NULL - pydependencies_out = [] + cdef size_t numDependencies_out = 0 cdef const cydriver.CUgraphEdgeData* cyedgeData_out = NULL pyedgeData_out = [] - cdef size_t numDependencies_out = 0 - with nogil: - err = cydriver.cuStreamGetCaptureInfo(cyhStream, &captureStatus_out, id_out._pvt_ptr, graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out) - if CUresult(err) == CUresult(0): - pydependencies_out = [CUgraphNode(init_value=cydependencies_out[idx]) for idx in range(numDependencies_out)] - if CUresult(err) == CUresult(0): - pyedgeData_out = [CUgraphEdgeData(_ptr=&cyedgeData_out[idx]) for idx in range(numDependencies_out)] + cdef const cydriver.CUgraphNode* cydependencies_out = NULL + pydependencies_out = [] + cdef CUgraph graph_out + cdef cuuint64_t id_out + cdef cydriver.CUstreamCaptureStatus captureStatus_out + cdef cydriver.CUstream cyhStream + try: + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + id_out = cuuint64_t() + graph_out = CUgraph() + with nogil: + err = cydriver.cuStreamGetCaptureInfo(cyhStream, &captureStatus_out, id_out._pvt_ptr, graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out) + finally: + if CUresult(err) == CUresult(0): + pydependencies_out = [CUgraphNode(init_value=cydependencies_out[idx]) for idx in range(numDependencies_out)] + if CUresult(err) == CUresult(0): + pyedgeData_out = [CUgraphEdgeData(_ptr=&cyedgeData_out[idx]) for idx in range(numDependencies_out)] if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None, None, None, None, None, None) return (_CUresult_SUCCESS, CUstreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, pyedgeData_out, numDependencies_out) @@ -39825,45 +40114,47 @@ def cuStreamUpdateCaptureDependencies(hStream, dependencies : Optional[tuple[CUg -------- :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamGetCaptureInfo` """ - dependencyData = [] if dependencyData is None else dependencyData - if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData): - raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream - cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr cdef cydriver.CUgraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 1: - cydependencyData = calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData)) - if cydependencyData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - for idx in range(len(dependencyData)): - string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) - elif len(dependencyData) == 1: - cydependencyData = (dependencyData[0])._pvt_ptr - with nogil: - err = cydriver.cuStreamUpdateCaptureDependencies(cyhStream, cydependencies, cydependencyData, numDependencies, flags) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) - if len(dependencyData) > 1 and cydependencyData is not NULL: - free(cydependencyData) + cdef cydriver.CUgraphNode* cydependencies = NULL + cdef cydriver.CUstream cyhStream + try: + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + dependencyData = [] if dependencyData is None else dependencyData + if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData): + raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") + if len(dependencyData) > 1: + cydependencyData = calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData)) + if cydependencyData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) + for idx in range(len(dependencyData)): + string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr + with nogil: + err = cydriver.cuStreamUpdateCaptureDependencies(cyhStream, cydependencies, cydependencyData, numDependencies, flags) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) + if len(dependencyData) > 1 and cydependencyData is not NULL: + free(cydependencyData) return (_CUresult(err),) {{endif}} @@ -39960,13 +40251,6 @@ def cuStreamAttachMemAsync(hStream, dptr, size_t length, unsigned int flags): :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuMemAllocManaged`, :py:obj:`~.cudaStreamAttachMemAsync` """ cdef cydriver.CUdeviceptr cydptr - if dptr is None: - pdptr = 0 - elif isinstance(dptr, (CUdeviceptr,)): - pdptr = int(dptr) - else: - pdptr = int(CUdeviceptr(dptr)) - cydptr = pdptr cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -39975,6 +40259,13 @@ def cuStreamAttachMemAsync(hStream, dptr, size_t length, unsigned int flags): else: phStream = int(CUstream(hStream)) cyhStream = phStream + if dptr is None: + pdptr = 0 + elif isinstance(dptr, (CUdeviceptr,)): + pdptr = int(dptr) + else: + pdptr = int(CUdeviceptr(dptr)) + cydptr = pdptr with nogil: err = cydriver.cuStreamAttachMemAsync(cyhStream, cydptr, length, flags) return (_CUresult(err),) @@ -40128,13 +40419,6 @@ def cuStreamCopyAttributes(dst, src): :py:obj:`~.CUaccessPolicyWindow` """ cdef cydriver.CUstream cysrc - if src is None: - psrc = 0 - elif isinstance(src, (CUstream,)): - psrc = int(src) - else: - psrc = int(CUstream(src)) - cysrc = psrc cdef cydriver.CUstream cydst if dst is None: pdst = 0 @@ -40143,6 +40427,13 @@ def cuStreamCopyAttributes(dst, src): else: pdst = int(CUstream(dst)) cydst = pdst + if src is None: + psrc = 0 + elif isinstance(src, (CUstream,)): + psrc = int(src) + else: + psrc = int(CUstream(src)) + cysrc = psrc with nogil: err = cydriver.cuStreamCopyAttributes(cydst, cysrc) return (_CUresult(err),) @@ -40175,6 +40466,8 @@ def cuStreamGetAttribute(hStream, attr not None : CUstreamAttrID): -------- :py:obj:`~.CUaccessPolicyWindow` """ + cdef CUstreamAttrValue value_out + cdef cydriver.CUstreamAttrID cyattr cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -40183,8 +40476,8 @@ def cuStreamGetAttribute(hStream, attr not None : CUstreamAttrID): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef cydriver.CUstreamAttrID cyattr = int(attr) - cdef CUstreamAttrValue value_out = CUstreamAttrValue() + cyattr = int(attr) + value_out = CUstreamAttrValue() with nogil: err = cydriver.cuStreamGetAttribute(cyhStream, cyattr, value_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -40220,6 +40513,8 @@ def cuStreamSetAttribute(hStream, attr not None : CUstreamAttrID, value : Option -------- :py:obj:`~.CUaccessPolicyWindow` """ + cdef cydriver.CUstreamAttrValue* cyvalue_ptr + cdef cydriver.CUstreamAttrID cyattr cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -40228,8 +40523,8 @@ def cuStreamSetAttribute(hStream, attr not None : CUstreamAttrID, value : Option else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef cydriver.CUstreamAttrID cyattr = int(attr) - cdef cydriver.CUstreamAttrValue* cyvalue_ptr = value._pvt_ptr if value is not None else NULL + cyattr = int(attr) + cyvalue_ptr = value._pvt_ptr if value is not None else NULL with nogil: err = cydriver.cuStreamSetAttribute(cyhStream, cyattr, cyvalue_ptr) return (_CUresult(err),) @@ -40278,7 +40573,8 @@ def cuEventCreate(unsigned int Flags): -------- :py:obj:`~.cuEventRecord`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventElapsedTime`, :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventCreateWithFlags` """ - cdef CUevent phEvent = CUevent() + cdef CUevent phEvent + phEvent = CUevent() with nogil: err = cydriver.cuEventCreate(phEvent._pvt_ptr, Flags) if err != cydriver.CUDA_SUCCESS: @@ -40326,13 +40622,6 @@ def cuEventRecord(hEvent, hStream): :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventElapsedTime`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cuEventRecordWithFlags` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUevent cyhEvent if hEvent is None: phEvent = 0 @@ -40341,6 +40630,13 @@ def cuEventRecord(hEvent, hStream): else: phEvent = int(CUevent(hEvent)) cyhEvent = phEvent + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuEventRecord(cyhEvent, cyhStream) return (_CUresult(err),) @@ -40396,13 +40692,6 @@ def cuEventRecordWithFlags(hEvent, hStream, unsigned int flags): :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cuEventElapsedTime`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cudaEventRecord` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUevent cyhEvent if hEvent is None: phEvent = 0 @@ -40411,6 +40700,13 @@ def cuEventRecordWithFlags(hEvent, hStream, unsigned int flags): else: phEvent = int(CUevent(hEvent)) cyhEvent = phEvent + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuEventRecordWithFlags(cyhEvent, cyhStream, flags) return (_CUresult(err),) @@ -40596,14 +40892,8 @@ def cuEventElapsedTime(hStart, hEnd): :py:obj:`~.cuEventCreate`, :py:obj:`~.cuEventRecord`, :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventDestroy`, :py:obj:`~.cudaEventElapsedTime` """ cdef cydriver.CUevent cyhEnd - if hEnd is None: - phEnd = 0 - elif isinstance(hEnd, (CUevent,)): - phEnd = int(hEnd) - else: - phEnd = int(CUevent(hEnd)) - cyhEnd = phEnd cdef cydriver.CUevent cyhStart + cdef float pMilliseconds = 0 if hStart is None: phStart = 0 elif isinstance(hStart, (CUevent,)): @@ -40611,7 +40901,13 @@ def cuEventElapsedTime(hStart, hEnd): else: phStart = int(CUevent(hStart)) cyhStart = phStart - cdef float pMilliseconds = 0 + if hEnd is None: + phEnd = 0 + elif isinstance(hEnd, (CUevent,)): + phEnd = int(hEnd) + else: + phEnd = int(CUevent(hEnd)) + cyhEnd = phEnd with nogil: err = cydriver.cuEventElapsedTime(&pMilliseconds, cyhStart, cyhEnd) if err != cydriver.CUDA_SUCCESS: @@ -40776,8 +41072,10 @@ def cuImportExternalMemory(memHandleDesc : Optional[CUDA_EXTERNAL_MEMORY_HANDLE_ If the Vulkan memory imported into CUDA is mapped on the CPU then the application must use vkInvalidateMappedMemoryRanges/vkFlushMappedMemoryRanges as well as appropriate Vulkan pipeline barriers to maintain coherence between CPU and GPU. For more information on these APIs, please refer to "Synchronization and Cache Control" chapter from Vulkan specification. """ - cdef CUexternalMemory extMem_out = CUexternalMemory() - cdef cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC* cymemHandleDesc_ptr = memHandleDesc._pvt_ptr if memHandleDesc is not None else NULL + cdef cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC* cymemHandleDesc_ptr + cdef CUexternalMemory extMem_out + extMem_out = CUexternalMemory() + cymemHandleDesc_ptr = memHandleDesc._pvt_ptr if memHandleDesc is not None else NULL with nogil: err = cydriver.cuImportExternalMemory(extMem_out._pvt_ptr, cymemHandleDesc_ptr) if err != cydriver.CUDA_SUCCESS: @@ -40838,7 +41136,10 @@ def cuExternalMemoryGetMappedBuffer(extMem, bufferDesc : Optional[CUDA_EXTERNAL_ -------- :py:obj:`~.cuImportExternalMemory`, :py:obj:`~.cuDestroyExternalMemory`, :py:obj:`~.cuExternalMemoryGetMappedMipmappedArray` """ + cdef cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC* cybufferDesc_ptr cdef cydriver.CUexternalMemory cyextMem + cdef CUdeviceptr devPtr + devPtr = CUdeviceptr() if extMem is None: pextMem = 0 elif isinstance(extMem, (CUexternalMemory,)): @@ -40846,8 +41147,7 @@ def cuExternalMemoryGetMappedBuffer(extMem, bufferDesc : Optional[CUDA_EXTERNAL_ else: pextMem = int(CUexternalMemory(extMem)) cyextMem = pextMem - cdef CUdeviceptr devPtr = CUdeviceptr() - cdef cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC* cybufferDesc_ptr = bufferDesc._pvt_ptr if bufferDesc is not None else NULL + cybufferDesc_ptr = bufferDesc._pvt_ptr if bufferDesc is not None else NULL with nogil: err = cydriver.cuExternalMemoryGetMappedBuffer(devPtr._pvt_ptr, cyextMem, cybufferDesc_ptr) if err != cydriver.CUDA_SUCCESS: @@ -40914,7 +41214,10 @@ def cuExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[CUDA_E -------- :py:obj:`~.cuImportExternalMemory`, :py:obj:`~.cuDestroyExternalMemory`, :py:obj:`~.cuExternalMemoryGetMappedBuffer` """ + cdef cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* cymipmapDesc_ptr cdef cydriver.CUexternalMemory cyextMem + cdef CUmipmappedArray mipmap + mipmap = CUmipmappedArray() if extMem is None: pextMem = 0 elif isinstance(extMem, (CUexternalMemory,)): @@ -40922,8 +41225,7 @@ def cuExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[CUDA_E else: pextMem = int(CUexternalMemory(extMem)) cyextMem = pextMem - cdef CUmipmappedArray mipmap = CUmipmappedArray() - cdef cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC* cymipmapDesc_ptr = mipmapDesc._pvt_ptr if mipmapDesc is not None else NULL + cymipmapDesc_ptr = mipmapDesc._pvt_ptr if mipmapDesc is not None else NULL with nogil: err = cydriver.cuExternalMemoryGetMappedMipmappedArray(mipmap._pvt_ptr, cyextMem, cymipmapDesc_ptr) if err != cydriver.CUDA_SUCCESS: @@ -41111,8 +41413,10 @@ def cuImportExternalSemaphore(semHandleDesc : Optional[CUDA_EXTERNAL_SEMAPHORE_H -------- :py:obj:`~.cuDestroyExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync` """ - cdef CUexternalSemaphore extSem_out = CUexternalSemaphore() - cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* cysemHandleDesc_ptr = semHandleDesc._pvt_ptr if semHandleDesc is not None else NULL + cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC* cysemHandleDesc_ptr + cdef CUexternalSemaphore extSem_out + extSem_out = CUexternalSemaphore() + cysemHandleDesc_ptr = semHandleDesc._pvt_ptr if semHandleDesc is not None else NULL with nogil: err = cydriver.cuImportExternalSemaphore(extSem_out._pvt_ptr, cysemHandleDesc_ptr) if err != cydriver.CUDA_SUCCESS: @@ -41230,46 +41534,48 @@ def cuSignalExternalSemaphoresAsync(extSemArray : Optional[tuple[CUexternalSemap :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuDestroyExternalSemaphore`, :py:obj:`~.cuWaitExternalSemaphoresAsync` """ cdef cydriver.CUstream cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (CUstream,)): - pstream = int(stream) - else: - pstream = int(CUstream(stream)) - cystream = pstream - paramsArray = [] if paramsArray is None else paramsArray - if not all(isinstance(_x, (CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,)) for _x in paramsArray): - raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,] or list[cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,]") - extSemArray = [] if extSemArray is None else extSemArray - if not all(isinstance(_x, (CUexternalSemaphore,)) for _x in extSemArray): - raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cydriver.CUexternalSemaphore,] or list[cydriver.CUexternalSemaphore,]") - cdef cydriver.CUexternalSemaphore* cyextSemArray = NULL - if len(extSemArray) > 1: - cyextSemArray = calloc(len(extSemArray), sizeof(cydriver.CUexternalSemaphore)) - if cyextSemArray is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore))) - else: - for idx in range(len(extSemArray)): - cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] - elif len(extSemArray) == 1: - cyextSemArray = (extSemArray[0])._pvt_ptr cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* cyparamsArray = NULL - if len(paramsArray) > 1: - cyparamsArray = calloc(len(paramsArray), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS)) - if cyparamsArray is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS))) - for idx in range(len(paramsArray)): - string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS)) - elif len(paramsArray) == 1: - cyparamsArray = (paramsArray[0])._pvt_ptr - if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) - if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) - with nogil: - err = cydriver.cuSignalExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) - if len(extSemArray) > 1 and cyextSemArray is not NULL: - free(cyextSemArray) - if len(paramsArray) > 1 and cyparamsArray is not NULL: - free(cyparamsArray) + cdef cydriver.CUexternalSemaphore* cyextSemArray = NULL + try: + extSemArray = [] if extSemArray is None else extSemArray + if not all(isinstance(_x, (CUexternalSemaphore,)) for _x in extSemArray): + raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cydriver.CUexternalSemaphore,] or list[cydriver.CUexternalSemaphore,]") + if len(extSemArray) > 1: + cyextSemArray = calloc(len(extSemArray), sizeof(cydriver.CUexternalSemaphore)) + if cyextSemArray is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore))) + else: + for idx in range(len(extSemArray)): + cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] + elif len(extSemArray) == 1: + cyextSemArray = (extSemArray[0])._pvt_ptr + paramsArray = [] if paramsArray is None else paramsArray + if not all(isinstance(_x, (CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,)) for _x in paramsArray): + raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,] or list[cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,]") + if len(paramsArray) > 1: + cyparamsArray = calloc(len(paramsArray), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS)) + if cyparamsArray is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS))) + for idx in range(len(paramsArray)): + string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS)) + elif len(paramsArray) == 1: + cyparamsArray = (paramsArray[0])._pvt_ptr + if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) + if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) + if stream is None: + pstream = 0 + elif isinstance(stream, (CUstream,)): + pstream = int(stream) + else: + pstream = int(CUstream(stream)) + cystream = pstream + with nogil: + err = cydriver.cuSignalExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) + finally: + if len(extSemArray) > 1 and cyextSemArray is not NULL: + free(cyextSemArray) + if len(paramsArray) > 1 and cyparamsArray is not NULL: + free(cyparamsArray) return (_CUresult(err),) {{endif}} @@ -41360,46 +41666,48 @@ def cuWaitExternalSemaphoresAsync(extSemArray : Optional[tuple[CUexternalSemapho :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuDestroyExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync` """ cdef cydriver.CUstream cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (CUstream,)): - pstream = int(stream) - else: - pstream = int(CUstream(stream)) - cystream = pstream - paramsArray = [] if paramsArray is None else paramsArray - if not all(isinstance(_x, (CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,)) for _x in paramsArray): - raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,] or list[cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,]") - extSemArray = [] if extSemArray is None else extSemArray - if not all(isinstance(_x, (CUexternalSemaphore,)) for _x in extSemArray): - raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cydriver.CUexternalSemaphore,] or list[cydriver.CUexternalSemaphore,]") - cdef cydriver.CUexternalSemaphore* cyextSemArray = NULL - if len(extSemArray) > 1: - cyextSemArray = calloc(len(extSemArray), sizeof(cydriver.CUexternalSemaphore)) - if cyextSemArray is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore))) - else: - for idx in range(len(extSemArray)): - cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] - elif len(extSemArray) == 1: - cyextSemArray = (extSemArray[0])._pvt_ptr cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* cyparamsArray = NULL - if len(paramsArray) > 1: - cyparamsArray = calloc(len(paramsArray), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS)) - if cyparamsArray is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS))) - for idx in range(len(paramsArray)): - string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS)) - elif len(paramsArray) == 1: - cyparamsArray = (paramsArray[0])._pvt_ptr - if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) - if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) - with nogil: - err = cydriver.cuWaitExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) - if len(extSemArray) > 1 and cyextSemArray is not NULL: - free(cyextSemArray) - if len(paramsArray) > 1 and cyparamsArray is not NULL: - free(cyparamsArray) + cdef cydriver.CUexternalSemaphore* cyextSemArray = NULL + try: + extSemArray = [] if extSemArray is None else extSemArray + if not all(isinstance(_x, (CUexternalSemaphore,)) for _x in extSemArray): + raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cydriver.CUexternalSemaphore,] or list[cydriver.CUexternalSemaphore,]") + if len(extSemArray) > 1: + cyextSemArray = calloc(len(extSemArray), sizeof(cydriver.CUexternalSemaphore)) + if cyextSemArray is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cydriver.CUexternalSemaphore))) + else: + for idx in range(len(extSemArray)): + cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] + elif len(extSemArray) == 1: + cyextSemArray = (extSemArray[0])._pvt_ptr + paramsArray = [] if paramsArray is None else paramsArray + if not all(isinstance(_x, (CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,)) for _x in paramsArray): + raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,] or list[cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,]") + if len(paramsArray) > 1: + cyparamsArray = calloc(len(paramsArray), sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS)) + if cyparamsArray is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS))) + for idx in range(len(paramsArray)): + string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS)) + elif len(paramsArray) == 1: + cyparamsArray = (paramsArray[0])._pvt_ptr + if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) + if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) + if stream is None: + pstream = 0 + elif isinstance(stream, (CUstream,)): + pstream = int(stream) + else: + pstream = int(CUstream(stream)) + cystream = pstream + with nogil: + err = cydriver.cuWaitExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) + finally: + if len(extSemArray) > 1 and cyextSemArray is not NULL: + free(cyextSemArray) + if len(paramsArray) > 1 and cyparamsArray is not NULL: + free(cyparamsArray) return (_CUresult(err),) {{endif}} @@ -41486,21 +41794,7 @@ def cuStreamWaitValue32(stream, addr, value, unsigned int flags): Warning: Improper use of this API may deadlock the application. Synchronization ordering established through this API is not visible to CUDA. CUDA tasks that are (even indirectly) ordered by this API should also have that order expressed with CUDA-visible dependencies such as events. This ensures that the scheduler does not serialize them in an improper order. """ cdef cydriver.cuuint32_t cyvalue - if value is None: - pvalue = 0 - elif isinstance(value, (cuuint32_t,)): - pvalue = int(value) - else: - pvalue = int(cuuint32_t(value)) - cyvalue = pvalue cdef cydriver.CUdeviceptr cyaddr - if addr is None: - paddr = 0 - elif isinstance(addr, (CUdeviceptr,)): - paddr = int(addr) - else: - paddr = int(CUdeviceptr(addr)) - cyaddr = paddr cdef cydriver.CUstream cystream if stream is None: pstream = 0 @@ -41509,6 +41803,20 @@ def cuStreamWaitValue32(stream, addr, value, unsigned int flags): else: pstream = int(CUstream(stream)) cystream = pstream + if addr is None: + paddr = 0 + elif isinstance(addr, (CUdeviceptr,)): + paddr = int(addr) + else: + paddr = int(CUdeviceptr(addr)) + cyaddr = paddr + if value is None: + pvalue = 0 + elif isinstance(value, (cuuint32_t,)): + pvalue = int(value) + else: + pvalue = int(cuuint32_t(value)) + cyvalue = pvalue with nogil: err = cydriver.cuStreamWaitValue32(cystream, cyaddr, cyvalue, flags) return (_CUresult(err),) @@ -41558,21 +41866,7 @@ def cuStreamWaitValue64(stream, addr, value, unsigned int flags): Warning: Improper use of this API may deadlock the application. Synchronization ordering established through this API is not visible to CUDA. CUDA tasks that are (even indirectly) ordered by this API should also have that order expressed with CUDA-visible dependencies such as events. This ensures that the scheduler does not serialize them in an improper order. """ cdef cydriver.cuuint64_t cyvalue - if value is None: - pvalue = 0 - elif isinstance(value, (cuuint64_t,)): - pvalue = int(value) - else: - pvalue = int(cuuint64_t(value)) - cyvalue = pvalue cdef cydriver.CUdeviceptr cyaddr - if addr is None: - paddr = 0 - elif isinstance(addr, (CUdeviceptr,)): - paddr = int(addr) - else: - paddr = int(CUdeviceptr(addr)) - cyaddr = paddr cdef cydriver.CUstream cystream if stream is None: pstream = 0 @@ -41581,6 +41875,20 @@ def cuStreamWaitValue64(stream, addr, value, unsigned int flags): else: pstream = int(CUstream(stream)) cystream = pstream + if addr is None: + paddr = 0 + elif isinstance(addr, (CUdeviceptr,)): + paddr = int(addr) + else: + paddr = int(CUdeviceptr(addr)) + cyaddr = paddr + if value is None: + pvalue = 0 + elif isinstance(value, (cuuint64_t,)): + pvalue = int(value) + else: + pvalue = int(cuuint64_t(value)) + cyvalue = pvalue with nogil: err = cydriver.cuStreamWaitValue64(cystream, cyaddr, cyvalue, flags) return (_CUresult(err),) @@ -41620,21 +41928,7 @@ def cuStreamWriteValue32(stream, addr, value, unsigned int flags): :py:obj:`~.cuStreamWriteValue64`, :py:obj:`~.cuStreamWaitValue32`, :py:obj:`~.cuStreamWaitValue64`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cuEventRecord` """ cdef cydriver.cuuint32_t cyvalue - if value is None: - pvalue = 0 - elif isinstance(value, (cuuint32_t,)): - pvalue = int(value) - else: - pvalue = int(cuuint32_t(value)) - cyvalue = pvalue cdef cydriver.CUdeviceptr cyaddr - if addr is None: - paddr = 0 - elif isinstance(addr, (CUdeviceptr,)): - paddr = int(addr) - else: - paddr = int(CUdeviceptr(addr)) - cyaddr = paddr cdef cydriver.CUstream cystream if stream is None: pstream = 0 @@ -41643,6 +41937,20 @@ def cuStreamWriteValue32(stream, addr, value, unsigned int flags): else: pstream = int(CUstream(stream)) cystream = pstream + if addr is None: + paddr = 0 + elif isinstance(addr, (CUdeviceptr,)): + paddr = int(addr) + else: + paddr = int(CUdeviceptr(addr)) + cyaddr = paddr + if value is None: + pvalue = 0 + elif isinstance(value, (cuuint32_t,)): + pvalue = int(value) + else: + pvalue = int(cuuint32_t(value)) + cyvalue = pvalue with nogil: err = cydriver.cuStreamWriteValue32(cystream, cyaddr, cyvalue, flags) return (_CUresult(err),) @@ -41684,21 +41992,7 @@ def cuStreamWriteValue64(stream, addr, value, unsigned int flags): :py:obj:`~.cuStreamWriteValue32`, :py:obj:`~.cuStreamWaitValue32`, :py:obj:`~.cuStreamWaitValue64`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuMemHostRegister`, :py:obj:`~.cuEventRecord` """ cdef cydriver.cuuint64_t cyvalue - if value is None: - pvalue = 0 - elif isinstance(value, (cuuint64_t,)): - pvalue = int(value) - else: - pvalue = int(cuuint64_t(value)) - cyvalue = pvalue cdef cydriver.CUdeviceptr cyaddr - if addr is None: - paddr = 0 - elif isinstance(addr, (CUdeviceptr,)): - paddr = int(addr) - else: - paddr = int(CUdeviceptr(addr)) - cyaddr = paddr cdef cydriver.CUstream cystream if stream is None: pstream = 0 @@ -41707,6 +42001,20 @@ def cuStreamWriteValue64(stream, addr, value, unsigned int flags): else: pstream = int(CUstream(stream)) cystream = pstream + if addr is None: + paddr = 0 + elif isinstance(addr, (CUdeviceptr,)): + paddr = int(addr) + else: + paddr = int(CUdeviceptr(addr)) + cyaddr = paddr + if value is None: + pvalue = 0 + elif isinstance(value, (cuuint64_t,)): + pvalue = int(value) + else: + pvalue = int(cuuint64_t(value)) + cyvalue = pvalue with nogil: err = cydriver.cuStreamWriteValue64(cystream, cyaddr, cyvalue, flags) return (_CUresult(err),) @@ -41757,31 +42065,33 @@ def cuStreamBatchMemOp(stream, unsigned int count, paramArray : Optional[tuple[C ----- Warning: Improper use of this API may deadlock the application. Synchronization ordering established through this API is not visible to CUDA. CUDA tasks that are (even indirectly) ordered by this API should also have that order expressed with CUDA-visible dependencies such as events. This ensures that the scheduler does not serialize them in an improper order. """ - paramArray = [] if paramArray is None else paramArray - if not all(isinstance(_x, (CUstreamBatchMemOpParams,)) for _x in paramArray): - raise TypeError("Argument 'paramArray' is not instance of type (expected tuple[cydriver.CUstreamBatchMemOpParams,] or list[cydriver.CUstreamBatchMemOpParams,]") - cdef cydriver.CUstream cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (CUstream,)): - pstream = int(stream) - else: - pstream = int(CUstream(stream)) - cystream = pstream - if count > len(paramArray): raise RuntimeError("List is too small: " + str(len(paramArray)) + " < " + str(count)) cdef cydriver.CUstreamBatchMemOpParams* cyparamArray = NULL - if len(paramArray) > 1: - cyparamArray = calloc(len(paramArray), sizeof(cydriver.CUstreamBatchMemOpParams)) - if cyparamArray is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramArray)) + 'x' + str(sizeof(cydriver.CUstreamBatchMemOpParams))) - for idx in range(len(paramArray)): - string.memcpy(&cyparamArray[idx], (paramArray[idx])._pvt_ptr, sizeof(cydriver.CUstreamBatchMemOpParams)) - elif len(paramArray) == 1: - cyparamArray = (paramArray[0])._pvt_ptr - with nogil: - err = cydriver.cuStreamBatchMemOp(cystream, count, cyparamArray, flags) - if len(paramArray) > 1 and cyparamArray is not NULL: - free(cyparamArray) + cdef cydriver.CUstream cystream + try: + if stream is None: + pstream = 0 + elif isinstance(stream, (CUstream,)): + pstream = int(stream) + else: + pstream = int(CUstream(stream)) + cystream = pstream + if count > len(paramArray): raise RuntimeError("List is too small: " + str(len(paramArray)) + " < " + str(count)) + paramArray = [] if paramArray is None else paramArray + if not all(isinstance(_x, (CUstreamBatchMemOpParams,)) for _x in paramArray): + raise TypeError("Argument 'paramArray' is not instance of type (expected tuple[cydriver.CUstreamBatchMemOpParams,] or list[cydriver.CUstreamBatchMemOpParams,]") + if len(paramArray) > 1: + cyparamArray = calloc(len(paramArray), sizeof(cydriver.CUstreamBatchMemOpParams)) + if cyparamArray is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramArray)) + 'x' + str(sizeof(cydriver.CUstreamBatchMemOpParams))) + for idx in range(len(paramArray)): + string.memcpy(&cyparamArray[idx], (paramArray[idx])._pvt_ptr, sizeof(cydriver.CUstreamBatchMemOpParams)) + elif len(paramArray) == 1: + cyparamArray = (paramArray[0])._pvt_ptr + with nogil: + err = cydriver.cuStreamBatchMemOp(cystream, count, cyparamArray, flags) + finally: + if len(paramArray) > 1 and cyparamArray is not NULL: + free(cyparamArray) return (_CUresult(err),) {{endif}} @@ -41905,6 +42215,9 @@ def cuFuncGetAttribute(attrib not None : CUfunction_attribute, hfunc): :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cudaFuncGetAttributes`, :py:obj:`~.cudaFuncSetAttribute`, :py:obj:`~.cuFuncIsLoaded`, :py:obj:`~.cuFuncLoad`, :py:obj:`~.cuKernelGetAttribute` """ cdef cydriver.CUfunction cyhfunc + cdef cydriver.CUfunction_attribute cyattrib + cdef int pi = 0 + cyattrib = int(attrib) if hfunc is None: phfunc = 0 elif isinstance(hfunc, (CUfunction,)): @@ -41912,8 +42225,6 @@ def cuFuncGetAttribute(attrib not None : CUfunction_attribute, hfunc): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - cdef int pi = 0 - cdef cydriver.CUfunction_attribute cyattrib = int(attrib) with nogil: err = cydriver.cuFuncGetAttribute(&pi, cyattrib, cyhfunc) if err != cydriver.CUDA_SUCCESS: @@ -42001,6 +42312,7 @@ def cuFuncSetAttribute(hfunc, attrib not None : CUfunction_attribute, int value) -------- :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cudaFuncGetAttributes`, :py:obj:`~.cudaFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute` """ + cdef cydriver.CUfunction_attribute cyattrib cdef cydriver.CUfunction cyhfunc if hfunc is None: phfunc = 0 @@ -42009,7 +42321,7 @@ def cuFuncSetAttribute(hfunc, attrib not None : CUfunction_attribute, int value) else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - cdef cydriver.CUfunction_attribute cyattrib = int(attrib) + cyattrib = int(attrib) with nogil: err = cydriver.cuFuncSetAttribute(cyhfunc, cyattrib, value) return (_CUresult(err),) @@ -42067,6 +42379,7 @@ def cuFuncSetCacheConfig(hfunc, config not None : CUfunc_cache): -------- :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuKernelSetCacheConfig` """ + cdef cydriver.CUfunc_cache cyconfig cdef cydriver.CUfunction cyhfunc if hfunc is None: phfunc = 0 @@ -42075,7 +42388,7 @@ def cuFuncSetCacheConfig(hfunc, config not None : CUfunc_cache): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - cdef cydriver.CUfunc_cache cyconfig = int(config) + cyconfig = int(config) with nogil: err = cydriver.cuFuncSetCacheConfig(cyhfunc, cyconfig) return (_CUresult(err),) @@ -42110,6 +42423,8 @@ def cuFuncGetModule(hfunc): Returned module handle """ cdef cydriver.CUfunction cyhfunc + cdef CUmodule hmod + hmod = CUmodule() if hfunc is None: phfunc = 0 elif isinstance(hfunc, (CUfunction,)): @@ -42117,7 +42432,6 @@ def cuFuncGetModule(hfunc): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - cdef CUmodule hmod = CUmodule() with nogil: err = cydriver.cuFuncGetModule(hmod._pvt_ptr, cyhfunc) if err != cydriver.CUDA_SUCCESS: @@ -42152,6 +42466,7 @@ def cuFuncGetName(hfunc): The returned name of the function """ cdef cydriver.CUfunction cyhfunc + cdef const char* name = NULL if hfunc is None: phfunc = 0 elif isinstance(hfunc, (CUfunction,)): @@ -42159,7 +42474,6 @@ def cuFuncGetName(hfunc): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - cdef const char* name = NULL with nogil: err = cydriver.cuFuncGetName(&name, cyhfunc) if err != cydriver.CUDA_SUCCESS: @@ -42205,6 +42519,8 @@ def cuFuncGetParamInfo(func, size_t paramIndex): -------- :py:obj:`~.cuKernelGetParamInfo` """ + cdef size_t paramSize = 0 + cdef size_t paramOffset = 0 cdef cydriver.CUfunction cyfunc if func is None: pfunc = 0 @@ -42213,8 +42529,6 @@ def cuFuncGetParamInfo(func, size_t paramIndex): else: pfunc = int(CUfunction(func)) cyfunc = pfunc - cdef size_t paramOffset = 0 - cdef size_t paramSize = 0 with nogil: err = cydriver.cuFuncGetParamInfo(cyfunc, paramIndex, ¶mOffset, ¶mSize) if err != cydriver.CUDA_SUCCESS: @@ -42247,6 +42561,7 @@ def cuFuncIsLoaded(function): :py:obj:`~.cuFuncLoad`, :py:obj:`~.cuModuleEnumerateFunctions` """ cdef cydriver.CUfunction cyfunction + cdef cydriver.CUfunctionLoadingState state if function is None: pfunction = 0 elif isinstance(function, (CUfunction,)): @@ -42254,7 +42569,6 @@ def cuFuncIsLoaded(function): else: pfunction = int(CUfunction(function)) cyfunction = pfunction - cdef cydriver.CUfunctionLoadingState state with nogil: err = cydriver.cuFuncIsLoaded(&state, cyfunction) if err != cydriver.CUDA_SUCCESS: @@ -42408,14 +42722,8 @@ def cuLaunchKernel(f, unsigned int gridDimX, unsigned int gridDimY, unsigned int -------- :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuKernelSetCacheConfig`, :py:obj:`~.cuKernelGetAttribute`, :py:obj:`~.cuKernelSetAttribute` """ + cdef void** cykernelParams_ptr cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUfunction cyf if f is None: pf = 0 @@ -42424,8 +42732,15 @@ def cuLaunchKernel(f, unsigned int gridDimX, unsigned int gridDimY, unsigned int else: pf = int(CUfunction(f)) cyf = pf + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream cykernelParams = _HelperKernelParams(kernelParams) - cdef void** cykernelParams_ptr = cykernelParams.ckernelParams + cykernelParams_ptr = cykernelParams.ckernelParams with nogil: err = cydriver.cuLaunchKernel(cyf, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, cyhStream, cykernelParams_ptr, extra) return (_CUresult(err),) @@ -42656,7 +42971,10 @@ def cuLaunchKernelEx(config : Optional[CUlaunchConfig], f, kernelParams, void_pt -------- :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaLaunchKernelEx`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuKernelSetCacheConfig`, :py:obj:`~.cuKernelGetAttribute`, :py:obj:`~.cuKernelSetAttribute` """ + cdef void** cykernelParams_ptr cdef cydriver.CUfunction cyf + cdef cydriver.CUlaunchConfig* cyconfig_ptr + cyconfig_ptr = config._pvt_ptr if config is not None else NULL if f is None: pf = 0 elif isinstance(f, (CUfunction,)): @@ -42664,9 +42982,8 @@ def cuLaunchKernelEx(config : Optional[CUlaunchConfig], f, kernelParams, void_pt else: pf = int(CUfunction(f)) cyf = pf - cdef cydriver.CUlaunchConfig* cyconfig_ptr = config._pvt_ptr if config is not None else NULL cykernelParams = _HelperKernelParams(kernelParams) - cdef void** cykernelParams_ptr = cykernelParams.ckernelParams + cykernelParams_ptr = cykernelParams.ckernelParams with nogil: err = cydriver.cuLaunchKernelEx(cyconfig_ptr, cyf, cykernelParams_ptr, extra) return (_CUresult(err),) @@ -42762,14 +43079,8 @@ def cuLaunchCooperativeKernel(f, unsigned int gridDimX, unsigned int gridDimY, u -------- :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuLaunchCooperativeKernelMultiDevice`, :py:obj:`~.cudaLaunchCooperativeKernel`, :py:obj:`~.cuLibraryGetKernel`, :py:obj:`~.cuKernelSetCacheConfig`, :py:obj:`~.cuKernelGetAttribute`, :py:obj:`~.cuKernelSetAttribute` """ + cdef void** cykernelParams_ptr cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUfunction cyf if f is None: pf = 0 @@ -42778,8 +43089,15 @@ def cuLaunchCooperativeKernel(f, unsigned int gridDimX, unsigned int gridDimY, u else: pf = int(CUfunction(f)) cyf = pf + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream cykernelParams = _HelperKernelParams(kernelParams) - cdef void** cykernelParams_ptr = cykernelParams.ckernelParams + cykernelParams_ptr = cykernelParams.ckernelParams with nogil: err = cydriver.cuLaunchCooperativeKernel(cyf, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, cyhStream, cykernelParams_ptr) return (_CUresult(err),) @@ -42939,23 +43257,25 @@ def cuLaunchCooperativeKernelMultiDevice(launchParamsList : Optional[tuple[CUDA_ -------- :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuLaunchCooperativeKernel`, :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice` """ - launchParamsList = [] if launchParamsList is None else launchParamsList - if not all(isinstance(_x, (CUDA_LAUNCH_PARAMS,)) for _x in launchParamsList): - raise TypeError("Argument 'launchParamsList' is not instance of type (expected tuple[cydriver.CUDA_LAUNCH_PARAMS,] or list[cydriver.CUDA_LAUNCH_PARAMS,]") cdef cydriver.CUDA_LAUNCH_PARAMS* cylaunchParamsList = NULL - if len(launchParamsList) > 1: - cylaunchParamsList = calloc(len(launchParamsList), sizeof(cydriver.CUDA_LAUNCH_PARAMS)) - if cylaunchParamsList is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(launchParamsList)) + 'x' + str(sizeof(cydriver.CUDA_LAUNCH_PARAMS))) - for idx in range(len(launchParamsList)): - string.memcpy(&cylaunchParamsList[idx], (launchParamsList[idx])._pvt_ptr, sizeof(cydriver.CUDA_LAUNCH_PARAMS)) - elif len(launchParamsList) == 1: - cylaunchParamsList = (launchParamsList[0])._pvt_ptr - if numDevices > len(launchParamsList): raise RuntimeError("List is too small: " + str(len(launchParamsList)) + " < " + str(numDevices)) - with nogil: - err = cydriver.cuLaunchCooperativeKernelMultiDevice(cylaunchParamsList, numDevices, flags) - if len(launchParamsList) > 1 and cylaunchParamsList is not NULL: - free(cylaunchParamsList) + try: + launchParamsList = [] if launchParamsList is None else launchParamsList + if not all(isinstance(_x, (CUDA_LAUNCH_PARAMS,)) for _x in launchParamsList): + raise TypeError("Argument 'launchParamsList' is not instance of type (expected tuple[cydriver.CUDA_LAUNCH_PARAMS,] or list[cydriver.CUDA_LAUNCH_PARAMS,]") + if len(launchParamsList) > 1: + cylaunchParamsList = calloc(len(launchParamsList), sizeof(cydriver.CUDA_LAUNCH_PARAMS)) + if cylaunchParamsList is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(launchParamsList)) + 'x' + str(sizeof(cydriver.CUDA_LAUNCH_PARAMS))) + for idx in range(len(launchParamsList)): + string.memcpy(&cylaunchParamsList[idx], (launchParamsList[idx])._pvt_ptr, sizeof(cydriver.CUDA_LAUNCH_PARAMS)) + elif len(launchParamsList) == 1: + cylaunchParamsList = (launchParamsList[0])._pvt_ptr + if numDevices > len(launchParamsList): raise RuntimeError("List is too small: " + str(len(launchParamsList)) + " < " + str(numDevices)) + with nogil: + err = cydriver.cuLaunchCooperativeKernelMultiDevice(cylaunchParamsList, numDevices, flags) + finally: + if len(launchParamsList) > 1 and cylaunchParamsList is not NULL: + free(cylaunchParamsList) return (_CUresult(err),) {{endif}} @@ -43035,37 +43355,39 @@ def cuLaunchHostFunc(hStream, fn, userData): -------- :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuMemAllocManaged`, :py:obj:`~.cuStreamAttachMemAsync`, :py:obj:`~.cuStreamAddCallback` """ - cdef cydriver.CUhostFn cyfn - if fn is None: - pfn = 0 - elif isinstance(fn, (CUhostFn,)): - pfn = int(fn) - else: - pfn = int(CUhostFn(fn)) - cyfn = pfn - cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef _HelperInputVoidPtrStruct cyuserDataHelper - cdef void* cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) - + cdef void* cyuserData cdef cuHostCallbackData *cbData = NULL - cbData = malloc(sizeof(cbData[0])) - if cbData == NULL: - return (CUresult.CUDA_ERROR_OUT_OF_MEMORY,) - cbData.callback = cyfn - cbData.userData = cyuserData - - with nogil: - err = cydriver.cuLaunchHostFunc(cyhStream, cuHostCallbackWrapper, cbData) - if err != cydriver.CUDA_SUCCESS: - free(cbData) - _helper_input_void_ptr_free(&cyuserDataHelper) + cdef cydriver.CUhostFn cyfn + cdef cydriver.CUstream cyhStream + try: + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream + if fn is None: + pfn = 0 + elif isinstance(fn, (CUhostFn,)): + pfn = int(fn) + else: + pfn = int(CUhostFn(fn)) + cyfn = pfn + cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) + cbData = malloc(sizeof(cbData[0])) + if cbData == NULL: + return (CUresult.CUDA_ERROR_OUT_OF_MEMORY,) + cbData.callback = cyfn + cbData.userData = cyuserData + + with nogil: + err = cydriver.cuLaunchHostFunc(cyhStream, cuHostCallbackWrapper, cbData) + finally: + if err != cydriver.CUDA_SUCCESS: + free(cbData) + _helper_input_void_ptr_free(&cyuserDataHelper) return (_CUresult(err),) {{endif}} @@ -43312,19 +43634,22 @@ def cuParamSetv(hfunc, int offset, ptr, unsigned int numbytes): -------- :py:obj:`~.cuFuncSetBlockShape`, :py:obj:`~.cuFuncSetSharedSize`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuParamSetSize`, :py:obj:`~.cuParamSetf`, :py:obj:`~.cuParamSeti`, :py:obj:`~.cuLaunch`, :py:obj:`~.cuLaunchGrid`, :py:obj:`~.cuLaunchGridAsync`, :py:obj:`~.cuLaunchKernel` """ - cdef cydriver.CUfunction cyhfunc - if hfunc is None: - phfunc = 0 - elif isinstance(hfunc, (CUfunction,)): - phfunc = int(hfunc) - else: - phfunc = int(CUfunction(hfunc)) - cyhfunc = phfunc cdef _HelperInputVoidPtrStruct cyptrHelper - cdef void* cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) - with nogil: - err = cydriver.cuParamSetv(cyhfunc, offset, cyptr, numbytes) - _helper_input_void_ptr_free(&cyptrHelper) + cdef void* cyptr + cdef cydriver.CUfunction cyhfunc + try: + if hfunc is None: + phfunc = 0 + elif isinstance(hfunc, (CUfunction,)): + phfunc = int(hfunc) + else: + phfunc = int(CUfunction(hfunc)) + cyhfunc = phfunc + cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) + with nogil: + err = cydriver.cuParamSetv(cyhfunc, offset, cyptr, numbytes) + finally: + _helper_input_void_ptr_free(&cyptrHelper) return (_CUresult(err),) {{endif}} @@ -43485,13 +43810,6 @@ def cuLaunchGridAsync(f, int grid_width, int grid_height, hStream): In certain cases where cubins are created with no ABI (i.e., using `ptxas` `None` `no`), this function may serialize kernel launches. The CUDA driver retains asynchronous behavior by growing the per-thread stack as needed per launch and not shrinking it afterwards. """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUfunction cyf if f is None: pf = 0 @@ -43500,6 +43818,13 @@ def cuLaunchGridAsync(f, int grid_width, int grid_height, hStream): else: pf = int(CUfunction(f)) cyf = pf + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuLaunchGridAsync(cyf, grid_width, grid_height, cyhStream) return (_CUresult(err),) @@ -43534,13 +43859,6 @@ def cuParamSetTexRef(hfunc, int texunit, hTexRef): :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_DEINITIALIZED`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_INVALID_CONTEXT`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE` """ cdef cydriver.CUtexref cyhTexRef - if hTexRef is None: - phTexRef = 0 - elif isinstance(hTexRef, (CUtexref,)): - phTexRef = int(hTexRef) - else: - phTexRef = int(CUtexref(hTexRef)) - cyhTexRef = phTexRef cdef cydriver.CUfunction cyhfunc if hfunc is None: phfunc = 0 @@ -43549,6 +43867,13 @@ def cuParamSetTexRef(hfunc, int texunit, hTexRef): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc + if hTexRef is None: + phTexRef = 0 + elif isinstance(hTexRef, (CUtexref,)): + phTexRef = int(hTexRef) + else: + phTexRef = int(CUtexref(hTexRef)) + cyhTexRef = phTexRef with nogil: err = cydriver.cuParamSetTexRef(cyhfunc, texunit, cyhTexRef) return (_CUresult(err),) @@ -43613,6 +43938,7 @@ def cuFuncSetSharedMemConfig(hfunc, config not None : CUsharedconfig): -------- :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuCtxGetSharedMemConfig`, :py:obj:`~.cuCtxSetSharedMemConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cudaFuncSetSharedMemConfig` """ + cdef cydriver.CUsharedconfig cyconfig cdef cydriver.CUfunction cyhfunc if hfunc is None: phfunc = 0 @@ -43621,7 +43947,7 @@ def cuFuncSetSharedMemConfig(hfunc, config not None : CUsharedconfig): else: phfunc = int(CUfunction(hfunc)) cyhfunc = phfunc - cdef cydriver.CUsharedconfig cyconfig = int(config) + cyconfig = int(config) with nogil: err = cydriver.cuFuncSetSharedMemConfig(cyhfunc, cyconfig) return (_CUresult(err),) @@ -43651,7 +43977,8 @@ def cuGraphCreate(unsigned int flags): -------- :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphDestroy`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphClone` """ - cdef CUgraph phGraph = CUgraph() + cdef CUgraph phGraph + phGraph = CUgraph() with nogil: err = cydriver.cuGraphCreate(phGraph._pvt_ptr, flags) if err != cydriver.CUDA_SUCCESS: @@ -43750,34 +44077,38 @@ def cuGraphAddKernelNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | li ----- Kernels launched using graphs must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects. """ - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() + cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cydriver.cuGraphAddKernelNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cydriver.cuGraphAddKernelNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -43816,6 +44147,7 @@ def cuGraphKernelNodeGetParams(hNode): -------- :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphKernelNodeSetParams` """ + cdef CUDA_KERNEL_NODE_PARAMS nodeParams cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -43824,7 +44156,7 @@ def cuGraphKernelNodeGetParams(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUDA_KERNEL_NODE_PARAMS nodeParams = CUDA_KERNEL_NODE_PARAMS() + nodeParams = CUDA_KERNEL_NODE_PARAMS() with nogil: err = cydriver.cuGraphKernelNodeGetParams(cyhNode, nodeParams._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -43856,6 +44188,7 @@ def cuGraphKernelNodeSetParams(hNode, nodeParams : Optional[CUDA_KERNEL_NODE_PAR -------- :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphKernelNodeGetParams` """ + cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -43864,7 +44197,7 @@ def cuGraphKernelNodeSetParams(hNode, nodeParams : Optional[CUDA_KERNEL_NODE_PAR else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphKernelNodeSetParams(cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -43921,41 +44254,45 @@ def cuGraphAddMemcpyNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | li :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuGraphMemcpyNodeGetParams`, :py:obj:`~.cuGraphMemcpyNodeSetParams`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemsetNode` """ cdef cydriver.CUcontext cyctx - if ctx is None: - pctx = 0 - elif isinstance(ctx, (CUcontext,)): - pctx = int(ctx) - else: - pctx = int(CUcontext(ctx)) - cyctx = pctx - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() + cdef cydriver.CUDA_MEMCPY3D* cycopyParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - cdef cydriver.CUDA_MEMCPY3D* cycopyParams_ptr = copyParams._pvt_ptr if copyParams is not None else NULL - with nogil: - err = cydriver.cuGraphAddMemcpyNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cycopyParams_ptr, cyctx) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + cycopyParams_ptr = copyParams._pvt_ptr if copyParams is not None else NULL + if ctx is None: + pctx = 0 + elif isinstance(ctx, (CUcontext,)): + pctx = int(ctx) + else: + pctx = int(CUcontext(ctx)) + cyctx = pctx + with nogil: + err = cydriver.cuGraphAddMemcpyNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cycopyParams_ptr, cyctx) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -43985,6 +44322,7 @@ def cuGraphMemcpyNodeGetParams(hNode): -------- :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphMemcpyNodeSetParams` """ + cdef CUDA_MEMCPY3D nodeParams cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -43993,7 +44331,7 @@ def cuGraphMemcpyNodeGetParams(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUDA_MEMCPY3D nodeParams = CUDA_MEMCPY3D() + nodeParams = CUDA_MEMCPY3D() with nogil: err = cydriver.cuGraphMemcpyNodeGetParams(cyhNode, nodeParams._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -44025,6 +44363,7 @@ def cuGraphMemcpyNodeSetParams(hNode, nodeParams : Optional[CUDA_MEMCPY3D]): -------- :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuMemcpy3D`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphMemcpyNodeGetParams` """ + cdef cydriver.CUDA_MEMCPY3D* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44033,7 +44372,7 @@ def cuGraphMemcpyNodeSetParams(hNode, nodeParams : Optional[CUDA_MEMCPY3D]): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUDA_MEMCPY3D* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphMemcpyNodeSetParams(cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -44080,41 +44419,45 @@ def cuGraphAddMemsetNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | li :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuGraphMemsetNodeGetParams`, :py:obj:`~.cuGraphMemsetNodeSetParams`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode` """ cdef cydriver.CUcontext cyctx - if ctx is None: - pctx = 0 - elif isinstance(ctx, (CUcontext,)): - pctx = int(ctx) - else: - pctx = int(CUcontext(ctx)) - cyctx = pctx - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() + cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cymemsetParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cymemsetParams_ptr = memsetParams._pvt_ptr if memsetParams is not None else NULL - with nogil: - err = cydriver.cuGraphAddMemsetNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cymemsetParams_ptr, cyctx) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + cymemsetParams_ptr = memsetParams._pvt_ptr if memsetParams is not None else NULL + if ctx is None: + pctx = 0 + elif isinstance(ctx, (CUcontext,)): + pctx = int(ctx) + else: + pctx = int(CUcontext(ctx)) + cyctx = pctx + with nogil: + err = cydriver.cuGraphAddMemsetNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cymemsetParams_ptr, cyctx) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -44144,6 +44487,7 @@ def cuGraphMemsetNodeGetParams(hNode): -------- :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphMemsetNodeSetParams` """ + cdef CUDA_MEMSET_NODE_PARAMS nodeParams cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44152,7 +44496,7 @@ def cuGraphMemsetNodeGetParams(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUDA_MEMSET_NODE_PARAMS nodeParams = CUDA_MEMSET_NODE_PARAMS() + nodeParams = CUDA_MEMSET_NODE_PARAMS() with nogil: err = cydriver.cuGraphMemsetNodeGetParams(cyhNode, nodeParams._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -44184,6 +44528,7 @@ def cuGraphMemsetNodeSetParams(hNode, nodeParams : Optional[CUDA_MEMSET_NODE_PAR -------- :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuMemsetD2D32`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphMemsetNodeGetParams` """ + cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44192,7 +44537,7 @@ def cuGraphMemsetNodeSetParams(hNode, nodeParams : Optional[CUDA_MEMSET_NODE_PAR else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphMemsetNodeSetParams(cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -44236,34 +44581,38 @@ def cuGraphAddHostNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | list -------- :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuLaunchHostFunc`, :py:obj:`~.cuGraphHostNodeGetParams`, :py:obj:`~.cuGraphHostNodeSetParams`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode` """ - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() + cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cydriver.cuGraphAddHostNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cydriver.cuGraphAddHostNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -44293,6 +44642,7 @@ def cuGraphHostNodeGetParams(hNode): -------- :py:obj:`~.cuLaunchHostFunc`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphHostNodeSetParams` """ + cdef CUDA_HOST_NODE_PARAMS nodeParams cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44301,7 +44651,7 @@ def cuGraphHostNodeGetParams(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUDA_HOST_NODE_PARAMS nodeParams = CUDA_HOST_NODE_PARAMS() + nodeParams = CUDA_HOST_NODE_PARAMS() with nogil: err = cydriver.cuGraphHostNodeGetParams(cyhNode, nodeParams._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -44333,6 +44683,7 @@ def cuGraphHostNodeSetParams(hNode, nodeParams : Optional[CUDA_HOST_NODE_PARAMS] -------- :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuLaunchHostFunc`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphHostNodeGetParams` """ + cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44341,7 +44692,7 @@ def cuGraphHostNodeSetParams(hNode, nodeParams : Optional[CUDA_HOST_NODE_PARAMS] else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphHostNodeSetParams(cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -44389,40 +44740,43 @@ def cuGraphAddChildGraphNode(hGraph, dependencies : Optional[tuple[CUgraphNode] :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphChildGraphNodeGetGraph`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphClone` """ cdef cydriver.CUgraph cychildGraph - if childGraph is None: - pchildGraph = 0 - elif isinstance(childGraph, (CUgraph,)): - pchildGraph = int(childGraph) - else: - pchildGraph = int(CUgraph(childGraph)) - cychildGraph = pchildGraph - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - with nogil: - err = cydriver.cuGraphAddChildGraphNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cychildGraph) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + if childGraph is None: + pchildGraph = 0 + elif isinstance(childGraph, (CUgraph,)): + pchildGraph = int(childGraph) + else: + pchildGraph = int(CUgraph(childGraph)) + cychildGraph = pchildGraph + with nogil: + err = cydriver.cuGraphAddChildGraphNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cychildGraph) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -44457,6 +44811,7 @@ def cuGraphChildGraphNodeGetGraph(hNode): -------- :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphNodeFindInClone` """ + cdef CUgraph phGraph cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44465,7 +44820,7 @@ def cuGraphChildGraphNodeGetGraph(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUgraph phGraph = CUgraph() + phGraph = CUgraph() with nogil: err = cydriver.cuGraphChildGraphNodeGetGraph(cyhNode, phGraph._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -44512,33 +44867,36 @@ def cuGraphAddEmptyNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | lis -------- :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode` """ - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - with nogil: - err = cydriver.cuGraphAddEmptyNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + with nogil: + err = cydriver.cuGraphAddEmptyNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -44583,40 +44941,43 @@ def cuGraphAddEventRecordNode(hGraph, dependencies : Optional[tuple[CUgraphNode] :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode` """ cdef cydriver.CUevent cyevent - if event is None: - pevent = 0 - elif isinstance(event, (CUevent,)): - pevent = int(event) - else: - pevent = int(CUevent(event)) - cyevent = pevent - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + if event is None: + pevent = 0 + elif isinstance(event, (CUevent,)): + pevent = int(event) else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - with nogil: - err = cydriver.cuGraphAddEventRecordNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cyevent) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + pevent = int(CUevent(event)) + cyevent = pevent + with nogil: + err = cydriver.cuGraphAddEventRecordNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cyevent) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -44646,6 +45007,7 @@ def cuGraphEventRecordNodeGetEvent(hNode): -------- :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphEventRecordNodeSetEvent`, :py:obj:`~.cuGraphEventWaitNodeGetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent` """ + cdef CUevent event_out cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44654,7 +45016,7 @@ def cuGraphEventRecordNodeGetEvent(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUevent event_out = CUevent() + event_out = CUevent() with nogil: err = cydriver.cuGraphEventRecordNodeGetEvent(cyhNode, event_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -44687,13 +45049,6 @@ def cuGraphEventRecordNodeSetEvent(hNode, event): :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphEventRecordNodeGetEvent`, :py:obj:`~.cuGraphEventWaitNodeSetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent` """ cdef cydriver.CUevent cyevent - if event is None: - pevent = 0 - elif isinstance(event, (CUevent,)): - pevent = int(event) - else: - pevent = int(CUevent(event)) - cyevent = pevent cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44702,6 +45057,13 @@ def cuGraphEventRecordNodeSetEvent(hNode, event): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode + if event is None: + pevent = 0 + elif isinstance(event, (CUevent,)): + pevent = int(event) + else: + pevent = int(CUevent(event)) + cyevent = pevent with nogil: err = cydriver.cuGraphEventRecordNodeSetEvent(cyhNode, cyevent) return (_CUresult(err),) @@ -44748,40 +45110,43 @@ def cuGraphAddEventWaitNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode` """ cdef cydriver.CUevent cyevent - if event is None: - pevent = 0 - elif isinstance(event, (CUevent,)): - pevent = int(event) - else: - pevent = int(CUevent(event)) - cyevent = pevent - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + if event is None: + pevent = 0 + elif isinstance(event, (CUevent,)): + pevent = int(event) else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - with nogil: - err = cydriver.cuGraphAddEventWaitNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cyevent) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + pevent = int(CUevent(event)) + cyevent = pevent + with nogil: + err = cydriver.cuGraphAddEventWaitNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cyevent) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -44811,6 +45176,7 @@ def cuGraphEventWaitNodeGetEvent(hNode): -------- :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphEventWaitNodeSetEvent`, :py:obj:`~.cuGraphEventRecordNodeGetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent` """ + cdef CUevent event_out cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44819,7 +45185,7 @@ def cuGraphEventWaitNodeGetEvent(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUevent event_out = CUevent() + event_out = CUevent() with nogil: err = cydriver.cuGraphEventWaitNodeGetEvent(cyhNode, event_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -44852,13 +45218,6 @@ def cuGraphEventWaitNodeSetEvent(hNode, event): :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphEventWaitNodeGetEvent`, :py:obj:`~.cuGraphEventRecordNodeSetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent` """ cdef cydriver.CUevent cyevent - if event is None: - pevent = 0 - elif isinstance(event, (CUevent,)): - pevent = int(event) - else: - pevent = int(CUevent(event)) - cyevent = pevent cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44867,6 +45226,13 @@ def cuGraphEventWaitNodeSetEvent(hNode, event): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode + if event is None: + pevent = 0 + elif isinstance(event, (CUevent,)): + pevent = int(event) + else: + pevent = int(CUevent(event)) + cyevent = pevent with nogil: err = cydriver.cuGraphEventWaitNodeSetEvent(cyhNode, cyevent) return (_CUresult(err),) @@ -44911,34 +45277,38 @@ def cuGraphAddExternalSemaphoresSignalNode(hGraph, dependencies : Optional[tuple -------- :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphExternalSemaphoresSignalNodeGetParams`, :py:obj:`~.cuGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode` """ - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() + cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cydriver.cuGraphAddExternalSemaphoresSignalNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cydriver.cuGraphAddExternalSemaphoresSignalNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -44974,6 +45344,7 @@ def cuGraphExternalSemaphoresSignalNodeGetParams(hNode): -------- :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync` """ + cdef CUDA_EXT_SEM_SIGNAL_NODE_PARAMS params_out cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -44982,7 +45353,7 @@ def cuGraphExternalSemaphoresSignalNodeGetParams(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUDA_EXT_SEM_SIGNAL_NODE_PARAMS params_out = CUDA_EXT_SEM_SIGNAL_NODE_PARAMS() + params_out = CUDA_EXT_SEM_SIGNAL_NODE_PARAMS() with nogil: err = cydriver.cuGraphExternalSemaphoresSignalNodeGetParams(cyhNode, params_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -45015,6 +45386,7 @@ def cuGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams : Optional[CU -------- :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync` """ + cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -45023,7 +45395,7 @@ def cuGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams : Optional[CU else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphExternalSemaphoresSignalNodeSetParams(cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -45068,34 +45440,38 @@ def cuGraphAddExternalSemaphoresWaitNode(hGraph, dependencies : Optional[tuple[C -------- :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphExternalSemaphoresWaitNodeGetParams`, :py:obj:`~.cuGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode` """ - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() + cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cydriver.cuGraphAddExternalSemaphoresWaitNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cydriver.cuGraphAddExternalSemaphoresWaitNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -45131,6 +45507,7 @@ def cuGraphExternalSemaphoresWaitNodeGetParams(hNode): -------- :py:obj:`~.cuLaunchKernel`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync` """ + cdef CUDA_EXT_SEM_WAIT_NODE_PARAMS params_out cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -45139,7 +45516,7 @@ def cuGraphExternalSemaphoresWaitNodeGetParams(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUDA_EXT_SEM_WAIT_NODE_PARAMS params_out = CUDA_EXT_SEM_WAIT_NODE_PARAMS() + params_out = CUDA_EXT_SEM_WAIT_NODE_PARAMS() with nogil: err = cydriver.cuGraphExternalSemaphoresWaitNodeGetParams(cyhNode, params_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -45172,6 +45549,7 @@ def cuGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams : Optional[CUDA -------- :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync` """ + cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -45180,7 +45558,7 @@ def cuGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams : Optional[CUDA else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphExternalSemaphoresWaitNodeSetParams(cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -45228,34 +45606,38 @@ def cuGraphAddBatchMemOpNode(hGraph, dependencies : Optional[tuple[CUgraphNode] ----- Warning: Improper use of this API may deadlock the application. Synchronization ordering established through this API is not visible to CUDA. CUDA tasks that are (even indirectly) ordered by this API should also have that order expressed with CUDA-visible dependencies such as events. This ensures that the scheduler does not serialize them in an improper order. """ - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() + cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cydriver.cuGraphAddBatchMemOpNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cydriver.cuGraphAddBatchMemOpNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -45290,6 +45672,7 @@ def cuGraphBatchMemOpNodeGetParams(hNode): -------- :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuGraphAddBatchMemOpNode`, :py:obj:`~.cuGraphBatchMemOpNodeSetParams` """ + cdef CUDA_BATCH_MEM_OP_NODE_PARAMS nodeParams_out cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -45298,7 +45681,7 @@ def cuGraphBatchMemOpNodeGetParams(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUDA_BATCH_MEM_OP_NODE_PARAMS nodeParams_out = CUDA_BATCH_MEM_OP_NODE_PARAMS() + nodeParams_out = CUDA_BATCH_MEM_OP_NODE_PARAMS() with nogil: err = cydriver.cuGraphBatchMemOpNodeGetParams(cyhNode, nodeParams_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -45333,6 +45716,7 @@ def cuGraphBatchMemOpNodeSetParams(hNode, nodeParams : Optional[CUDA_BATCH_MEM_O -------- :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuGraphAddBatchMemOpNode`, :py:obj:`~.cuGraphBatchMemOpNodeGetParams` """ + cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -45341,7 +45725,7 @@ def cuGraphBatchMemOpNodeSetParams(hNode, nodeParams : Optional[CUDA_BATCH_MEM_O else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphBatchMemOpNodeSetParams(cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -45396,14 +45780,8 @@ def cuGraphExecBatchMemOpNodeSetParams(hGraphExec, hNode, nodeParams : Optional[ -------- :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuStreamBatchMemOp`, :py:obj:`~.cuGraphAddBatchMemOpNode`, :py:obj:`~.cuGraphBatchMemOpNodeGetParams`, :py:obj:`~.cuGraphBatchMemOpNodeSetParams`, :py:obj:`~.cuGraphInstantiate` """ + cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -45412,7 +45790,14 @@ def cuGraphExecBatchMemOpNodeSetParams(hGraphExec, hNode, nodeParams : Optional[ else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphExecBatchMemOpNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -45497,34 +45882,38 @@ def cuGraphAddMemAllocNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | -------- :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphAddMemFreeNode`, :py:obj:`~.cuGraphMemAllocNodeGetParams`, :py:obj:`~.cuDeviceGraphMemTrim`, :py:obj:`~.cuDeviceGetGraphMemAttribute`, :py:obj:`~.cuDeviceSetGraphMemAttribute`, :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode` """ - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() + cdef cydriver.CUDA_MEM_ALLOC_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - cdef cydriver.CUDA_MEM_ALLOC_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cydriver.cuGraphAddMemAllocNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cydriver.cuGraphAddMemAllocNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cynodeParams_ptr) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -45557,6 +45946,7 @@ def cuGraphMemAllocNodeGetParams(hNode): -------- :py:obj:`~.cuGraphAddMemAllocNode`, :py:obj:`~.cuGraphMemFreeNodeGetParams` """ + cdef CUDA_MEM_ALLOC_NODE_PARAMS params_out cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -45565,7 +45955,7 @@ def cuGraphMemAllocNodeGetParams(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUDA_MEM_ALLOC_NODE_PARAMS params_out = CUDA_MEM_ALLOC_NODE_PARAMS() + params_out = CUDA_MEM_ALLOC_NODE_PARAMS() with nogil: err = cydriver.cuGraphMemAllocNodeGetParams(cyhNode, params_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -45630,40 +46020,43 @@ def cuGraphAddMemFreeNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | l :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphAddMemAllocNode`, :py:obj:`~.cuGraphMemFreeNodeGetParams`, :py:obj:`~.cuDeviceGraphMemTrim`, :py:obj:`~.cuDeviceGetGraphMemAttribute`, :py:obj:`~.cuDeviceSetGraphMemAttribute`, :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddEmptyNode`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode` """ cdef cydriver.CUdeviceptr cydptr - if dptr is None: - pdptr = 0 - elif isinstance(dptr, (CUdeviceptr,)): - pdptr = int(dptr) - else: - pdptr = int(CUdeviceptr(dptr)) - cydptr = pdptr - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + if dptr is None: + pdptr = 0 + elif isinstance(dptr, (CUdeviceptr,)): + pdptr = int(dptr) else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - with nogil: - err = cydriver.cuGraphAddMemFreeNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cydptr) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) + pdptr = int(CUdeviceptr(dptr)) + cydptr = pdptr + with nogil: + err = cydriver.cuGraphAddMemFreeNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, numDependencies, cydptr) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -45693,6 +46086,7 @@ def cuGraphMemFreeNodeGetParams(hNode): -------- :py:obj:`~.cuGraphAddMemFreeNode`, :py:obj:`~.cuGraphMemAllocNodeGetParams` """ + cdef CUdeviceptr dptr_out cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -45701,7 +46095,7 @@ def cuGraphMemFreeNodeGetParams(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUdeviceptr dptr_out = CUdeviceptr() + dptr_out = CUdeviceptr() with nogil: err = cydriver.cuGraphMemFreeNodeGetParams(cyhNode, dptr_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -45787,6 +46181,9 @@ def cuDeviceGetGraphMemAttribute(device, attr not None : CUgraphMem_attribute): -------- :py:obj:`~.cuDeviceSetGraphMemAttribute`, :py:obj:`~.cuGraphAddMemAllocNode`, :py:obj:`~.cuGraphAddMemFreeNode` """ + cdef _HelperCUgraphMem_attribute cyvalue + cdef void* cyvalue_ptr + cdef cydriver.CUgraphMem_attribute cyattr cdef cydriver.CUdevice cydevice if device is None: pdevice = 0 @@ -45795,9 +46192,9 @@ def cuDeviceGetGraphMemAttribute(device, attr not None : CUgraphMem_attribute): else: pdevice = int(CUdevice(device)) cydevice = pdevice - cdef cydriver.CUgraphMem_attribute cyattr = int(attr) - cdef _HelperCUgraphMem_attribute cyvalue = _HelperCUgraphMem_attribute(attr, 0, is_getter=True) - cdef void* cyvalue_ptr = cyvalue.cptr + cyattr = int(attr) + cyvalue = _HelperCUgraphMem_attribute(attr, 0, is_getter=True) + cyvalue_ptr = cyvalue.cptr with nogil: err = cydriver.cuDeviceGetGraphMemAttribute(cydevice, cyattr, cyvalue_ptr) if err != cydriver.CUDA_SUCCESS: @@ -45839,6 +46236,9 @@ def cuDeviceSetGraphMemAttribute(device, attr not None : CUgraphMem_attribute, v -------- :py:obj:`~.cuDeviceGetGraphMemAttribute`, :py:obj:`~.cuGraphAddMemAllocNode`, :py:obj:`~.cuGraphAddMemFreeNode` """ + cdef _HelperCUgraphMem_attribute cyvalue + cdef void* cyvalue_ptr + cdef cydriver.CUgraphMem_attribute cyattr cdef cydriver.CUdevice cydevice if device is None: pdevice = 0 @@ -45847,9 +46247,9 @@ def cuDeviceSetGraphMemAttribute(device, attr not None : CUgraphMem_attribute, v else: pdevice = int(CUdevice(device)) cydevice = pdevice - cdef cydriver.CUgraphMem_attribute cyattr = int(attr) - cdef _HelperCUgraphMem_attribute cyvalue = _HelperCUgraphMem_attribute(attr, value, is_getter=False) - cdef void* cyvalue_ptr = cyvalue.cptr + cyattr = int(attr) + cyvalue = _HelperCUgraphMem_attribute(attr, value, is_getter=False) + cyvalue_ptr = cyvalue.cptr with nogil: err = cydriver.cuDeviceSetGraphMemAttribute(cydevice, cyattr, cyvalue_ptr) return (_CUresult(err),) @@ -45890,6 +46290,8 @@ def cuGraphClone(originalGraph): : Cloning is not supported for graphs which contain memory allocation nodes, memory free nodes, or conditional nodes. """ cdef cydriver.CUgraph cyoriginalGraph + cdef CUgraph phGraphClone + phGraphClone = CUgraph() if originalGraph is None: poriginalGraph = 0 elif isinstance(originalGraph, (CUgraph,)): @@ -45897,7 +46299,6 @@ def cuGraphClone(originalGraph): else: poriginalGraph = int(CUgraph(originalGraph)) cyoriginalGraph = poriginalGraph - cdef CUgraph phGraphClone = CUgraph() with nogil: err = cydriver.cuGraphClone(phGraphClone._pvt_ptr, cyoriginalGraph) if err != cydriver.CUDA_SUCCESS: @@ -45939,14 +46340,9 @@ def cuGraphNodeFindInClone(hOriginalNode, hClonedGraph): :py:obj:`~.cuGraphClone` """ cdef cydriver.CUgraph cyhClonedGraph - if hClonedGraph is None: - phClonedGraph = 0 - elif isinstance(hClonedGraph, (CUgraph,)): - phClonedGraph = int(hClonedGraph) - else: - phClonedGraph = int(CUgraph(hClonedGraph)) - cyhClonedGraph = phClonedGraph cdef cydriver.CUgraphNode cyhOriginalNode + cdef CUgraphNode phNode + phNode = CUgraphNode() if hOriginalNode is None: phOriginalNode = 0 elif isinstance(hOriginalNode, (CUgraphNode,)): @@ -45954,7 +46350,13 @@ def cuGraphNodeFindInClone(hOriginalNode, hClonedGraph): else: phOriginalNode = int(CUgraphNode(hOriginalNode)) cyhOriginalNode = phOriginalNode - cdef CUgraphNode phNode = CUgraphNode() + if hClonedGraph is None: + phClonedGraph = 0 + elif isinstance(hClonedGraph, (CUgraph,)): + phClonedGraph = int(hClonedGraph) + else: + phClonedGraph = int(CUgraph(hClonedGraph)) + cyhClonedGraph = phClonedGraph with nogil: err = cydriver.cuGraphNodeFindInClone(phNode._pvt_ptr, cyhOriginalNode, cyhClonedGraph) if err != cydriver.CUDA_SUCCESS: @@ -45986,6 +46388,7 @@ def cuGraphNodeGetType(hNode): -------- :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphChildGraphNodeGetGraph`, :py:obj:`~.cuGraphKernelNodeGetParams`, :py:obj:`~.cuGraphKernelNodeSetParams`, :py:obj:`~.cuGraphHostNodeGetParams`, :py:obj:`~.cuGraphHostNodeSetParams`, :py:obj:`~.cuGraphMemcpyNodeGetParams`, :py:obj:`~.cuGraphMemcpyNodeSetParams`, :py:obj:`~.cuGraphMemsetNodeGetParams`, :py:obj:`~.cuGraphMemsetNodeSetParams` """ + cdef cydriver.CUgraphNodeType typename cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -45994,7 +46397,6 @@ def cuGraphNodeGetType(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUgraphNodeType typename with nogil: err = cydriver.cuGraphNodeGetType(cyhNode, &typename) if err != cydriver.CUDA_SUCCESS: @@ -46027,6 +46429,7 @@ def cuGraphNodeGetContainingGraph(hNode): -------- :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphDebugDotPrint` :py:obj:`~.cuGraphNodeGetLocalId` :py:obj:`~.cuGraphNodeGetToolsId` :py:obj:`~.cuGraphGetId` :py:obj:`~.cuGraphExecGetId` """ + cdef CUgraph phGraph cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -46035,7 +46438,7 @@ def cuGraphNodeGetContainingGraph(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef CUgraph phGraph = CUgraph() + phGraph = CUgraph() with nogil: err = cydriver.cuGraphNodeGetContainingGraph(cyhNode, phGraph._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -46069,6 +46472,7 @@ def cuGraphNodeGetLocalId(hNode): -------- :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphDebugDotPrint` :py:obj:`~.cuGraphNodeGetContainingGraph` :py:obj:`~.cuGraphNodeGetToolsId` :py:obj:`~.cuGraphGetId` :py:obj:`~.cuGraphExecGetId` """ + cdef unsigned int nodeId = 0 cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -46077,7 +46481,6 @@ def cuGraphNodeGetLocalId(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef unsigned int nodeId = 0 with nogil: err = cydriver.cuGraphNodeGetLocalId(cyhNode, &nodeId) if err != cydriver.CUDA_SUCCESS: @@ -46107,6 +46510,7 @@ def cuGraphNodeGetToolsId(hNode): -------- :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphDebugDotPrint` :py:obj:`~.cuGraphNodeGetContainingGraph` :py:obj:`~.cuGraphNodeGetLocalId` :py:obj:`~.cuGraphGetId` :py:obj:`~.cuGraphExecGetId` """ + cdef unsigned long long toolsNodeId = 0 cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -46115,7 +46519,6 @@ def cuGraphNodeGetToolsId(hNode): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef unsigned long long toolsNodeId = 0 with nogil: err = cydriver.cuGraphNodeGetToolsId(cyhNode, &toolsNodeId) if err != cydriver.CUDA_SUCCESS: @@ -46148,6 +46551,7 @@ def cuGraphGetId(hGraph): -------- :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphDebugDotPrint` :py:obj:`~.cuGraphNodeGetContainingGraph` :py:obj:`~.cuGraphNodeGetLocalId` :py:obj:`~.cuGraphNodeGetToolsId` :py:obj:`~.cuGraphExecGetId` """ + cdef unsigned int graphId = 0 cdef cydriver.CUgraph cyhGraph if hGraph is None: phGraph = 0 @@ -46156,7 +46560,6 @@ def cuGraphGetId(hGraph): else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - cdef unsigned int graphId = 0 with nogil: err = cydriver.cuGraphGetId(cyhGraph, &graphId) if err != cydriver.CUDA_SUCCESS: @@ -46189,6 +46592,7 @@ def cuGraphExecGetId(hGraphExec): -------- :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphDebugDotPrint` :py:obj:`~.cuGraphNodeGetContainingGraph` :py:obj:`~.cuGraphNodeGetLocalId` :py:obj:`~.cuGraphNodeGetToolsId` :py:obj:`~.cuGraphGetId` """ + cdef unsigned int graphId = 0 cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -46197,7 +46601,6 @@ def cuGraphExecGetId(hGraphExec): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef unsigned int graphId = 0 with nogil: err = cydriver.cuGraphExecGetId(cyhGraphExec, &graphId) if err != cydriver.CUDA_SUCCESS: @@ -46239,26 +46642,28 @@ def cuGraphGetNodes(hGraph, size_t numNodes = 0): :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetType`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes` """ cdef size_t _graph_length = numNodes - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph cdef cydriver.CUgraphNode* cynodes = NULL pynodes = [] - if _graph_length != 0: - cynodes = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) - if cynodes is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) - with nogil: - err = cydriver.cuGraphGetNodes(cyhGraph, cynodes, &numNodes) - if CUresult(err) == CUresult(0): - pynodes = [CUgraphNode(init_value=cynodes[idx]) for idx in range(_graph_length)] - if cynodes is not NULL: - free(cynodes) + cdef cydriver.CUgraph cyhGraph + try: + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + if _graph_length != 0: + cynodes = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) + if cynodes is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) + with nogil: + err = cydriver.cuGraphGetNodes(cyhGraph, cynodes, &numNodes) + finally: + if CUresult(err) == CUresult(0): + pynodes = [CUgraphNode(init_value=cynodes[idx]) for idx in range(_graph_length)] + if cynodes is not NULL: + free(cynodes) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None, None) return (_CUresult_SUCCESS, pynodes, numNodes) @@ -46298,26 +46703,28 @@ def cuGraphGetRootNodes(hGraph, size_t numRootNodes = 0): :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetType`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes` """ cdef size_t _graph_length = numRootNodes - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph cdef cydriver.CUgraphNode* cyrootNodes = NULL pyrootNodes = [] - if _graph_length != 0: - cyrootNodes = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) - if cyrootNodes is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) - with nogil: - err = cydriver.cuGraphGetRootNodes(cyhGraph, cyrootNodes, &numRootNodes) - if CUresult(err) == CUresult(0): - pyrootNodes = [CUgraphNode(init_value=cyrootNodes[idx]) for idx in range(_graph_length)] - if cyrootNodes is not NULL: - free(cyrootNodes) + cdef cydriver.CUgraph cyhGraph + try: + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + if _graph_length != 0: + cyrootNodes = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) + if cyrootNodes is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) + with nogil: + err = cydriver.cuGraphGetRootNodes(cyhGraph, cyrootNodes, &numRootNodes) + finally: + if CUresult(err) == CUresult(0): + pyrootNodes = [CUgraphNode(init_value=cyrootNodes[idx]) for idx in range(_graph_length)] + if cyrootNodes is not NULL: + free(cyrootNodes) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None, None) return (_CUresult_SUCCESS, pyrootNodes, numRootNodes) @@ -46368,46 +46775,48 @@ def cuGraphGetEdges(hGraph, size_t numEdges = 0): :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphRemoveDependencies`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes` """ cdef size_t _graph_length = numEdges - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef cydriver.CUgraphNode* cyfrom_ = NULL - pyfrom_ = [] - if _graph_length != 0: - cyfrom_ = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) - if cyfrom_ is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) - cdef cydriver.CUgraphNode* cyto = NULL - pyto = [] - if _graph_length != 0: - cyto = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) - if cyto is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) cdef cydriver.CUgraphEdgeData* cyedgeData = NULL pyedgeData = [] - if _graph_length != 0: - cyedgeData = calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData)) - if cyedgeData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - with nogil: - err = cydriver.cuGraphGetEdges(cyhGraph, cyfrom_, cyto, cyedgeData, &numEdges) - if CUresult(err) == CUresult(0): - pyfrom_ = [CUgraphNode(init_value=cyfrom_[idx]) for idx in range(_graph_length)] - if cyfrom_ is not NULL: - free(cyfrom_) - if CUresult(err) == CUresult(0): - pyto = [CUgraphNode(init_value=cyto[idx]) for idx in range(_graph_length)] - if cyto is not NULL: - free(cyto) - if CUresult(err) == CUresult(0): - pyedgeData = [CUgraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] - if cyedgeData is not NULL: - free(cyedgeData) + cdef cydriver.CUgraphNode* cyto = NULL + pyto = [] + cdef cydriver.CUgraphNode* cyfrom_ = NULL + pyfrom_ = [] + cdef cydriver.CUgraph cyhGraph + try: + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + if _graph_length != 0: + cyfrom_ = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) + if cyfrom_ is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) + if _graph_length != 0: + cyto = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) + if cyto is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) + if _graph_length != 0: + cyedgeData = calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData)) + if cyedgeData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) + with nogil: + err = cydriver.cuGraphGetEdges(cyhGraph, cyfrom_, cyto, cyedgeData, &numEdges) + finally: + if CUresult(err) == CUresult(0): + pyfrom_ = [CUgraphNode(init_value=cyfrom_[idx]) for idx in range(_graph_length)] + if cyfrom_ is not NULL: + free(cyfrom_) + if CUresult(err) == CUresult(0): + pyto = [CUgraphNode(init_value=cyto[idx]) for idx in range(_graph_length)] + if cyto is not NULL: + free(cyto) + if CUresult(err) == CUresult(0): + pyedgeData = [CUgraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + if cyedgeData is not NULL: + free(cyedgeData) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None, None, None, None) return (_CUresult_SUCCESS, pyfrom_, pyto, pyedgeData, numEdges) @@ -46455,36 +46864,38 @@ def cuGraphNodeGetDependencies(hNode, size_t numDependencies = 0): :py:obj:`~.cuGraphNodeGetDependentNodes`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphRemoveDependencies` """ cdef size_t _graph_length = numDependencies - cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode - cdef cydriver.CUgraphNode* cydependencies = NULL - pydependencies = [] - if _graph_length != 0: - cydependencies = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) cdef cydriver.CUgraphEdgeData* cyedgeData = NULL pyedgeData = [] - if _graph_length != 0: - cyedgeData = calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData)) - if cyedgeData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - with nogil: - err = cydriver.cuGraphNodeGetDependencies(cyhNode, cydependencies, cyedgeData, &numDependencies) - if CUresult(err) == CUresult(0): - pydependencies = [CUgraphNode(init_value=cydependencies[idx]) for idx in range(_graph_length)] - if cydependencies is not NULL: - free(cydependencies) - if CUresult(err) == CUresult(0): - pyedgeData = [CUgraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] - if cyedgeData is not NULL: - free(cyedgeData) + cdef cydriver.CUgraphNode* cydependencies = NULL + pydependencies = [] + cdef cydriver.CUgraphNode cyhNode + try: + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + if _graph_length != 0: + cydependencies = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) + if _graph_length != 0: + cyedgeData = calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData)) + if cyedgeData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) + with nogil: + err = cydriver.cuGraphNodeGetDependencies(cyhNode, cydependencies, cyedgeData, &numDependencies) + finally: + if CUresult(err) == CUresult(0): + pydependencies = [CUgraphNode(init_value=cydependencies[idx]) for idx in range(_graph_length)] + if cydependencies is not NULL: + free(cydependencies) + if CUresult(err) == CUresult(0): + pyedgeData = [CUgraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + if cyedgeData is not NULL: + free(cyedgeData) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None, None, None) return (_CUresult_SUCCESS, pydependencies, pyedgeData, numDependencies) @@ -46532,36 +46943,38 @@ def cuGraphNodeGetDependentNodes(hNode, size_t numDependentNodes = 0): :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphGetNodes`, :py:obj:`~.cuGraphGetRootNodes`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphRemoveDependencies` """ cdef size_t _graph_length = numDependentNodes - cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode - cdef cydriver.CUgraphNode* cydependentNodes = NULL - pydependentNodes = [] - if _graph_length != 0: - cydependentNodes = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) - if cydependentNodes is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) cdef cydriver.CUgraphEdgeData* cyedgeData = NULL pyedgeData = [] - if _graph_length != 0: - cyedgeData = calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData)) - if cyedgeData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - with nogil: - err = cydriver.cuGraphNodeGetDependentNodes(cyhNode, cydependentNodes, cyedgeData, &numDependentNodes) - if CUresult(err) == CUresult(0): - pydependentNodes = [CUgraphNode(init_value=cydependentNodes[idx]) for idx in range(_graph_length)] - if cydependentNodes is not NULL: - free(cydependentNodes) - if CUresult(err) == CUresult(0): - pyedgeData = [CUgraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] - if cyedgeData is not NULL: - free(cyedgeData) + cdef cydriver.CUgraphNode* cydependentNodes = NULL + pydependentNodes = [] + cdef cydriver.CUgraphNode cyhNode + try: + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + if _graph_length != 0: + cydependentNodes = calloc(_graph_length, sizeof(cydriver.CUgraphNode)) + if cydependentNodes is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphNode))) + if _graph_length != 0: + cyedgeData = calloc(_graph_length, sizeof(cydriver.CUgraphEdgeData)) + if cyedgeData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) + with nogil: + err = cydriver.cuGraphNodeGetDependentNodes(cyhNode, cydependentNodes, cyedgeData, &numDependentNodes) + finally: + if CUresult(err) == CUresult(0): + pydependentNodes = [CUgraphNode(init_value=cydependentNodes[idx]) for idx in range(_graph_length)] + if cydependentNodes is not NULL: + free(cydependentNodes) + if CUresult(err) == CUresult(0): + pyedgeData = [CUgraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + if cyedgeData is not NULL: + free(cyedgeData) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None, None, None) return (_CUresult_SUCCESS, pydependentNodes, pyedgeData, numDependentNodes) @@ -46603,60 +47016,62 @@ def cuGraphAddDependencies(hGraph, from_ : Optional[tuple[CUgraphNode] | list[CU -------- :py:obj:`~.cuGraphRemoveDependencies`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes` """ - edgeData = [] if edgeData is None else edgeData - if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in edgeData): - raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") - to = [] if to is None else to - if not all(isinstance(_x, (CUgraphNode,)) for _x in to): - raise TypeError("Argument 'to' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - from_ = [] if from_ is None else from_ - if not all(isinstance(_x, (CUgraphNode,)) for _x in from_): - raise TypeError("Argument 'from_' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef cydriver.CUgraphNode* cyfrom_ = NULL - if len(from_) > 1: - cyfrom_ = calloc(len(from_), sizeof(cydriver.CUgraphNode)) - if cyfrom_ is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(from_)): - cyfrom_[idx] = (from_[idx])._pvt_ptr[0] - elif len(from_) == 1: - cyfrom_ = (from_[0])._pvt_ptr - cdef cydriver.CUgraphNode* cyto = NULL - if len(to) > 1: - cyto = calloc(len(to), sizeof(cydriver.CUgraphNode)) - if cyto is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(to)): - cyto[idx] = (to[idx])._pvt_ptr[0] - elif len(to) == 1: - cyto = (to[0])._pvt_ptr cdef cydriver.CUgraphEdgeData* cyedgeData = NULL - if len(edgeData) > 1: - cyedgeData = calloc(len(edgeData), sizeof(cydriver.CUgraphEdgeData)) - if cyedgeData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - for idx in range(len(edgeData)): - string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) - elif len(edgeData) == 1: - cyedgeData = (edgeData[0])._pvt_ptr - with nogil: - err = cydriver.cuGraphAddDependencies(cyhGraph, cyfrom_, cyto, cyedgeData, numDependencies) - if len(from_) > 1 and cyfrom_ is not NULL: - free(cyfrom_) - if len(to) > 1 and cyto is not NULL: - free(cyto) - if len(edgeData) > 1 and cyedgeData is not NULL: - free(cyedgeData) + cdef cydriver.CUgraphNode* cyto = NULL + cdef cydriver.CUgraphNode* cyfrom_ = NULL + cdef cydriver.CUgraph cyhGraph + try: + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + from_ = [] if from_ is None else from_ + if not all(isinstance(_x, (CUgraphNode,)) for _x in from_): + raise TypeError("Argument 'from_' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(from_) > 1: + cyfrom_ = calloc(len(from_), sizeof(cydriver.CUgraphNode)) + if cyfrom_ is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(from_)): + cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr + to = [] if to is None else to + if not all(isinstance(_x, (CUgraphNode,)) for _x in to): + raise TypeError("Argument 'to' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(to) > 1: + cyto = calloc(len(to), sizeof(cydriver.CUgraphNode)) + if cyto is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(to)): + cyto[idx] = (to[idx])._pvt_ptr[0] + elif len(to) == 1: + cyto = (to[0])._pvt_ptr + edgeData = [] if edgeData is None else edgeData + if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in edgeData): + raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") + if len(edgeData) > 1: + cyedgeData = calloc(len(edgeData), sizeof(cydriver.CUgraphEdgeData)) + if cyedgeData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) + for idx in range(len(edgeData)): + string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) + elif len(edgeData) == 1: + cyedgeData = (edgeData[0])._pvt_ptr + with nogil: + err = cydriver.cuGraphAddDependencies(cyhGraph, cyfrom_, cyto, cyedgeData, numDependencies) + finally: + if len(from_) > 1 and cyfrom_ is not NULL: + free(cyfrom_) + if len(to) > 1 and cyto is not NULL: + free(cyto) + if len(edgeData) > 1 and cyedgeData is not NULL: + free(cyedgeData) return (_CUresult(err),) {{endif}} @@ -46702,60 +47117,62 @@ def cuGraphRemoveDependencies(hGraph, from_ : Optional[tuple[CUgraphNode] | list -------- :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes` """ - edgeData = [] if edgeData is None else edgeData - if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in edgeData): - raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") - to = [] if to is None else to - if not all(isinstance(_x, (CUgraphNode,)) for _x in to): - raise TypeError("Argument 'to' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - from_ = [] if from_ is None else from_ - if not all(isinstance(_x, (CUgraphNode,)) for _x in from_): - raise TypeError("Argument 'from_' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef cydriver.CUgraphNode* cyfrom_ = NULL - if len(from_) > 1: - cyfrom_ = calloc(len(from_), sizeof(cydriver.CUgraphNode)) - if cyfrom_ is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(from_)): - cyfrom_[idx] = (from_[idx])._pvt_ptr[0] - elif len(from_) == 1: - cyfrom_ = (from_[0])._pvt_ptr - cdef cydriver.CUgraphNode* cyto = NULL - if len(to) > 1: - cyto = calloc(len(to), sizeof(cydriver.CUgraphNode)) - if cyto is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(to)): - cyto[idx] = (to[idx])._pvt_ptr[0] - elif len(to) == 1: - cyto = (to[0])._pvt_ptr cdef cydriver.CUgraphEdgeData* cyedgeData = NULL - if len(edgeData) > 1: - cyedgeData = calloc(len(edgeData), sizeof(cydriver.CUgraphEdgeData)) - if cyedgeData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - for idx in range(len(edgeData)): - string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) - elif len(edgeData) == 1: - cyedgeData = (edgeData[0])._pvt_ptr - with nogil: - err = cydriver.cuGraphRemoveDependencies(cyhGraph, cyfrom_, cyto, cyedgeData, numDependencies) - if len(from_) > 1 and cyfrom_ is not NULL: - free(cyfrom_) - if len(to) > 1 and cyto is not NULL: - free(cyto) - if len(edgeData) > 1 and cyedgeData is not NULL: - free(cyedgeData) + cdef cydriver.CUgraphNode* cyto = NULL + cdef cydriver.CUgraphNode* cyfrom_ = NULL + cdef cydriver.CUgraph cyhGraph + try: + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + from_ = [] if from_ is None else from_ + if not all(isinstance(_x, (CUgraphNode,)) for _x in from_): + raise TypeError("Argument 'from_' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(from_) > 1: + cyfrom_ = calloc(len(from_), sizeof(cydriver.CUgraphNode)) + if cyfrom_ is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(from_)): + cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr + to = [] if to is None else to + if not all(isinstance(_x, (CUgraphNode,)) for _x in to): + raise TypeError("Argument 'to' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(to) > 1: + cyto = calloc(len(to), sizeof(cydriver.CUgraphNode)) + if cyto is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(to)): + cyto[idx] = (to[idx])._pvt_ptr[0] + elif len(to) == 1: + cyto = (to[0])._pvt_ptr + edgeData = [] if edgeData is None else edgeData + if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in edgeData): + raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") + if len(edgeData) > 1: + cyedgeData = calloc(len(edgeData), sizeof(cydriver.CUgraphEdgeData)) + if cyedgeData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) + for idx in range(len(edgeData)): + string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) + elif len(edgeData) == 1: + cyedgeData = (edgeData[0])._pvt_ptr + with nogil: + err = cydriver.cuGraphRemoveDependencies(cyhGraph, cyfrom_, cyto, cyedgeData, numDependencies) + finally: + if len(from_) > 1 and cyfrom_ is not NULL: + free(cyfrom_) + if len(to) > 1 and cyto is not NULL: + free(cyto) + if len(edgeData) > 1 and cyedgeData is not NULL: + free(cyedgeData) return (_CUresult(err),) {{endif}} @@ -46888,6 +47305,8 @@ def cuGraphInstantiate(hGraph, unsigned long long flags): :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphUpload`, :py:obj:`~.cuGraphLaunch`, :py:obj:`~.cuGraphExecDestroy` """ cdef cydriver.CUgraph cyhGraph + cdef CUgraphExec phGraphExec + phGraphExec = CUgraphExec() if hGraph is None: phGraph = 0 elif isinstance(hGraph, (CUgraph,)): @@ -46895,7 +47314,6 @@ def cuGraphInstantiate(hGraph, unsigned long long flags): else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - cdef CUgraphExec phGraphExec = CUgraphExec() with nogil: err = cydriver.cuGraphInstantiate(phGraphExec._pvt_ptr, cyhGraph, flags) if err != cydriver.CUDA_SUCCESS: @@ -47032,7 +47450,10 @@ def cuGraphInstantiateWithParams(hGraph, instantiateParams : Optional[CUDA_GRAPH -------- :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphExecDestroy` """ + cdef cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS* cyinstantiateParams_ptr cdef cydriver.CUgraph cyhGraph + cdef CUgraphExec phGraphExec + phGraphExec = CUgraphExec() if hGraph is None: phGraph = 0 elif isinstance(hGraph, (CUgraph,)): @@ -47040,8 +47461,7 @@ def cuGraphInstantiateWithParams(hGraph, instantiateParams : Optional[CUDA_GRAPH else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - cdef CUgraphExec phGraphExec = CUgraphExec() - cdef cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS* cyinstantiateParams_ptr = instantiateParams._pvt_ptr if instantiateParams is not None else NULL + cyinstantiateParams_ptr = instantiateParams._pvt_ptr if instantiateParams is not None else NULL with nogil: err = cydriver.cuGraphInstantiateWithParams(phGraphExec._pvt_ptr, cyhGraph, cyinstantiateParams_ptr) if err != cydriver.CUDA_SUCCESS: @@ -47076,6 +47496,7 @@ def cuGraphExecGetFlags(hGraphExec): -------- :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphInstantiateWithParams` """ + cdef cuuint64_t flags cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47084,7 +47505,7 @@ def cuGraphExecGetFlags(hGraphExec): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef cuuint64_t flags = cuuint64_t() + flags = cuuint64_t() with nogil: err = cydriver.cuGraphExecGetFlags(cyhGraphExec, flags._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -47151,14 +47572,8 @@ def cuGraphExecKernelNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA -------- :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` """ + cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47167,7 +47582,14 @@ def cuGraphExecKernelNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphExecKernelNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -47219,21 +47641,8 @@ def cuGraphExecMemcpyNodeSetParams(hGraphExec, hNode, copyParams : Optional[CUDA :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` """ cdef cydriver.CUcontext cyctx - if ctx is None: - pctx = 0 - elif isinstance(ctx, (CUcontext,)): - pctx = int(ctx) - else: - pctx = int(CUcontext(ctx)) - cyctx = pctx + cdef cydriver.CUDA_MEMCPY3D* cycopyParams_ptr cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47242,7 +47651,21 @@ def cuGraphExecMemcpyNodeSetParams(hGraphExec, hNode, copyParams : Optional[CUDA else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef cydriver.CUDA_MEMCPY3D* cycopyParams_ptr = copyParams._pvt_ptr if copyParams is not None else NULL + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + cycopyParams_ptr = copyParams._pvt_ptr if copyParams is not None else NULL + if ctx is None: + pctx = 0 + elif isinstance(ctx, (CUcontext,)): + pctx = int(ctx) + else: + pctx = int(CUcontext(ctx)) + cyctx = pctx with nogil: err = cydriver.cuGraphExecMemcpyNodeSetParams(cyhGraphExec, cyhNode, cycopyParams_ptr, cyctx) return (_CUresult(err),) @@ -47299,21 +47722,8 @@ def cuGraphExecMemsetNodeSetParams(hGraphExec, hNode, memsetParams : Optional[CU :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddMemsetNode`, :py:obj:`~.cuGraphMemsetNodeSetParams`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` """ cdef cydriver.CUcontext cyctx - if ctx is None: - pctx = 0 - elif isinstance(ctx, (CUcontext,)): - pctx = int(ctx) - else: - pctx = int(CUcontext(ctx)) - cyctx = pctx + cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cymemsetParams_ptr cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47322,7 +47732,21 @@ def cuGraphExecMemsetNodeSetParams(hGraphExec, hNode, memsetParams : Optional[CU else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cymemsetParams_ptr = memsetParams._pvt_ptr if memsetParams is not None else NULL + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + cymemsetParams_ptr = memsetParams._pvt_ptr if memsetParams is not None else NULL + if ctx is None: + pctx = 0 + elif isinstance(ctx, (CUcontext,)): + pctx = int(ctx) + else: + pctx = int(CUcontext(ctx)) + cyctx = pctx with nogil: err = cydriver.cuGraphExecMemsetNodeSetParams(cyhGraphExec, cyhNode, cymemsetParams_ptr, cyctx) return (_CUresult(err),) @@ -47361,14 +47785,8 @@ def cuGraphExecHostNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA_H -------- :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphHostNodeSetParams`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` """ + cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47377,7 +47795,14 @@ def cuGraphExecHostNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUDA_H else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphExecHostNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -47425,21 +47850,7 @@ def cuGraphExecChildGraphNodeSetParams(hGraphExec, hNode, childGraph): :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphChildGraphNodeGetGraph`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` """ cdef cydriver.CUgraph cychildGraph - if childGraph is None: - pchildGraph = 0 - elif isinstance(childGraph, (CUgraph,)): - pchildGraph = int(childGraph) - else: - pchildGraph = int(CUgraph(childGraph)) - cychildGraph = pchildGraph cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47448,6 +47859,20 @@ def cuGraphExecChildGraphNodeSetParams(hGraphExec, hNode, childGraph): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + if childGraph is None: + pchildGraph = 0 + elif isinstance(childGraph, (CUgraph,)): + pchildGraph = int(childGraph) + else: + pchildGraph = int(CUgraph(childGraph)) + cychildGraph = pchildGraph with nogil: err = cydriver.cuGraphExecChildGraphNodeSetParams(cyhGraphExec, cyhNode, cychildGraph) return (_CUresult(err),) @@ -47488,21 +47913,7 @@ def cuGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event): :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddEventRecordNode`, :py:obj:`~.cuGraphEventRecordNodeGetEvent`, :py:obj:`~.cuGraphEventWaitNodeSetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` """ cdef cydriver.CUevent cyevent - if event is None: - pevent = 0 - elif isinstance(event, (CUevent,)): - pevent = int(event) - else: - pevent = int(CUevent(event)) - cyevent = pevent cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47511,6 +47922,20 @@ def cuGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + if event is None: + pevent = 0 + elif isinstance(event, (CUevent,)): + pevent = int(event) + else: + pevent = int(CUevent(event)) + cyevent = pevent with nogil: err = cydriver.cuGraphExecEventRecordNodeSetEvent(cyhGraphExec, cyhNode, cyevent) return (_CUresult(err),) @@ -47551,21 +47976,7 @@ def cuGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event): :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddEventWaitNode`, :py:obj:`~.cuGraphEventWaitNodeGetEvent`, :py:obj:`~.cuGraphEventRecordNodeSetEvent`, :py:obj:`~.cuEventRecordWithFlags`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` """ cdef cydriver.CUevent cyevent - if event is None: - pevent = 0 - elif isinstance(event, (CUevent,)): - pevent = int(event) - else: - pevent = int(CUevent(event)) - cyevent = pevent cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47574,6 +47985,20 @@ def cuGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + if event is None: + pevent = 0 + elif isinstance(event, (CUevent,)): + pevent = int(event) + else: + pevent = int(CUevent(event)) + cyevent = pevent with nogil: err = cydriver.cuGraphExecEventWaitNodeSetEvent(cyhGraphExec, cyhNode, cyevent) return (_CUresult(err),) @@ -47617,14 +48042,8 @@ def cuGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodePara -------- :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` """ + cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47633,7 +48052,14 @@ def cuGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodePara else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphExecExternalSemaphoresSignalNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -47677,14 +48103,8 @@ def cuGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams -------- :py:obj:`~.cuGraphExecNodeSetParams`, :py:obj:`~.cuGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cuImportExternalSemaphore`, :py:obj:`~.cuSignalExternalSemaphoresAsync`, :py:obj:`~.cuWaitExternalSemaphoresAsync`, :py:obj:`~.cuGraphExecKernelNodeSetParams`, :py:obj:`~.cuGraphExecMemcpyNodeSetParams`, :py:obj:`~.cuGraphExecMemsetNodeSetParams`, :py:obj:`~.cuGraphExecHostNodeSetParams`, :py:obj:`~.cuGraphExecChildGraphNodeSetParams`, :py:obj:`~.cuGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cuGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cuGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` """ + cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47693,7 +48113,14 @@ def cuGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphExecExternalSemaphoresWaitNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -47748,13 +48175,6 @@ def cuGraphNodeSetEnabled(hGraphExec, hNode, unsigned int isEnabled): Currently only kernel, memset and memcpy nodes are supported. """ cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47763,6 +48183,13 @@ def cuGraphNodeSetEnabled(hGraphExec, hNode, unsigned int isEnabled): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode with nogil: err = cydriver.cuGraphNodeSetEnabled(cyhGraphExec, cyhNode, isEnabled) return (_CUresult(err),) @@ -47805,14 +48232,8 @@ def cuGraphNodeGetEnabled(hGraphExec, hNode): This function will not reflect device-side updates for device-updatable kernel nodes. """ + cdef unsigned int isEnabled = 0 cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47821,7 +48242,13 @@ def cuGraphNodeGetEnabled(hGraphExec, hNode): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef unsigned int isEnabled = 0 + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode with nogil: err = cydriver.cuGraphNodeGetEnabled(cyhGraphExec, cyhNode, &isEnabled) if err != cydriver.CUDA_SUCCESS: @@ -47858,13 +48285,6 @@ def cuGraphUpload(hGraphExec, hStream): :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphLaunch`, :py:obj:`~.cuGraphExecDestroy` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47873,6 +48293,13 @@ def cuGraphUpload(hGraphExec, hStream): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuGraphUpload(cyhGraphExec, cyhStream) return (_CUresult(err),) @@ -47912,13 +48339,6 @@ def cuGraphLaunch(hGraphExec, hStream): :py:obj:`~.cuGraphInstantiate`, :py:obj:`~.cuGraphUpload`, :py:obj:`~.cuGraphExecDestroy` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -47927,6 +48347,13 @@ def cuGraphLaunch(hGraphExec, hStream): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuGraphLaunch(cyhGraphExec, cyhStream) return (_CUresult(err),) @@ -48160,14 +48587,8 @@ def cuGraphExecUpdate(hGraphExec, hGraph): -------- :py:obj:`~.cuGraphInstantiate` """ + cdef CUgraphExecUpdateResultInfo resultInfo cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -48176,7 +48597,14 @@ def cuGraphExecUpdate(hGraphExec, hGraph): else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef CUgraphExecUpdateResultInfo resultInfo = CUgraphExecUpdateResultInfo() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + resultInfo = CUgraphExecUpdateResultInfo() with nogil: err = cydriver.cuGraphExecUpdate(cyhGraphExec, cyhGraph, resultInfo._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -48211,13 +48639,6 @@ def cuGraphKernelNodeCopyAttributes(dst, src): :py:obj:`~.CUaccessPolicyWindow` """ cdef cydriver.CUgraphNode cysrc - if src is None: - psrc = 0 - elif isinstance(src, (CUgraphNode,)): - psrc = int(src) - else: - psrc = int(CUgraphNode(src)) - cysrc = psrc cdef cydriver.CUgraphNode cydst if dst is None: pdst = 0 @@ -48226,6 +48647,13 @@ def cuGraphKernelNodeCopyAttributes(dst, src): else: pdst = int(CUgraphNode(dst)) cydst = pdst + if src is None: + psrc = 0 + elif isinstance(src, (CUgraphNode,)): + psrc = int(src) + else: + psrc = int(CUgraphNode(src)) + cysrc = psrc with nogil: err = cydriver.cuGraphKernelNodeCopyAttributes(cydst, cysrc) return (_CUresult(err),) @@ -48258,6 +48686,8 @@ def cuGraphKernelNodeGetAttribute(hNode, attr not None : CUkernelNodeAttrID): -------- :py:obj:`~.CUaccessPolicyWindow` """ + cdef CUkernelNodeAttrValue value_out + cdef cydriver.CUkernelNodeAttrID cyattr cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -48266,8 +48696,8 @@ def cuGraphKernelNodeGetAttribute(hNode, attr not None : CUkernelNodeAttrID): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUkernelNodeAttrID cyattr = int(attr) - cdef CUkernelNodeAttrValue value_out = CUkernelNodeAttrValue() + cyattr = int(attr) + value_out = CUkernelNodeAttrValue() with nogil: err = cydriver.cuGraphKernelNodeGetAttribute(cyhNode, cyattr, value_out._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -48302,6 +48732,8 @@ def cuGraphKernelNodeSetAttribute(hNode, attr not None : CUkernelNodeAttrID, val -------- :py:obj:`~.CUaccessPolicyWindow` """ + cdef cydriver.CUkernelNodeAttrValue* cyvalue_ptr + cdef cydriver.CUkernelNodeAttrID cyattr cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -48310,8 +48742,8 @@ def cuGraphKernelNodeSetAttribute(hNode, attr not None : CUkernelNodeAttrID, val else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUkernelNodeAttrID cyattr = int(attr) - cdef cydriver.CUkernelNodeAttrValue* cyvalue_ptr = value._pvt_ptr if value is not None else NULL + cyattr = int(attr) + cyvalue_ptr = value._pvt_ptr if value is not None else NULL with nogil: err = cydriver.cuGraphKernelNodeSetAttribute(cyhNode, cyattr, cyvalue_ptr) return (_CUresult(err),) @@ -48402,19 +48834,23 @@ def cuUserObjectCreate(ptr, destroy, unsigned int initialRefcount, unsigned int :py:obj:`~.cuUserObjectRetain`, :py:obj:`~.cuUserObjectRelease`, :py:obj:`~.cuGraphRetainUserObject`, :py:obj:`~.cuGraphReleaseUserObject`, :py:obj:`~.cuGraphCreate` """ cdef cydriver.CUhostFn cydestroy - if destroy is None: - pdestroy = 0 - elif isinstance(destroy, (CUhostFn,)): - pdestroy = int(destroy) - else: - pdestroy = int(CUhostFn(destroy)) - cydestroy = pdestroy - cdef CUuserObject object_out = CUuserObject() cdef _HelperInputVoidPtrStruct cyptrHelper - cdef void* cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) - with nogil: - err = cydriver.cuUserObjectCreate(object_out._pvt_ptr, cyptr, cydestroy, initialRefcount, flags) - _helper_input_void_ptr_free(&cyptrHelper) + cdef void* cyptr + cdef CUuserObject object_out + try: + object_out = CUuserObject() + cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) + if destroy is None: + pdestroy = 0 + elif isinstance(destroy, (CUhostFn,)): + pdestroy = int(destroy) + else: + pdestroy = int(CUhostFn(destroy)) + cydestroy = pdestroy + with nogil: + err = cydriver.cuUserObjectCreate(object_out._pvt_ptr, cyptr, cydestroy, initialRefcount, flags) + finally: + _helper_input_void_ptr_free(&cyptrHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, object_out) @@ -48543,13 +48979,6 @@ def cuGraphRetainUserObject(graph, object, unsigned int count, unsigned int flag :py:obj:`~.cuUserObjectCreate`, :py:obj:`~.cuUserObjectRetain`, :py:obj:`~.cuUserObjectRelease`, :py:obj:`~.cuGraphReleaseUserObject`, :py:obj:`~.cuGraphCreate` """ cdef cydriver.CUuserObject cyobject - if object is None: - pobject = 0 - elif isinstance(object, (CUuserObject,)): - pobject = int(object) - else: - pobject = int(CUuserObject(object)) - cyobject = pobject cdef cydriver.CUgraph cygraph if graph is None: pgraph = 0 @@ -48558,6 +48987,13 @@ def cuGraphRetainUserObject(graph, object, unsigned int count, unsigned int flag else: pgraph = int(CUgraph(graph)) cygraph = pgraph + if object is None: + pobject = 0 + elif isinstance(object, (CUuserObject,)): + pobject = int(object) + else: + pobject = int(CUuserObject(object)) + cyobject = pobject with nogil: err = cydriver.cuGraphRetainUserObject(cygraph, cyobject, count, flags) return (_CUresult(err),) @@ -48594,13 +49030,6 @@ def cuGraphReleaseUserObject(graph, object, unsigned int count): :py:obj:`~.cuUserObjectCreate`, :py:obj:`~.cuUserObjectRetain`, :py:obj:`~.cuUserObjectRelease`, :py:obj:`~.cuGraphRetainUserObject`, :py:obj:`~.cuGraphCreate` """ cdef cydriver.CUuserObject cyobject - if object is None: - pobject = 0 - elif isinstance(object, (CUuserObject,)): - pobject = int(object) - else: - pobject = int(CUuserObject(object)) - cyobject = pobject cdef cydriver.CUgraph cygraph if graph is None: pgraph = 0 @@ -48609,6 +49038,13 @@ def cuGraphReleaseUserObject(graph, object, unsigned int count): else: pgraph = int(CUgraph(graph)) cygraph = pgraph + if object is None: + pobject = 0 + elif isinstance(object, (CUuserObject,)): + pobject = int(object) + else: + pobject = int(CUuserObject(object)) + cyobject = pobject with nogil: err = cydriver.cuGraphReleaseUserObject(cygraph, cyobject, count) return (_CUresult(err),) @@ -48664,47 +49100,51 @@ def cuGraphAddNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | list[CUg -------- :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphNodeSetParams`, :py:obj:`~.cuGraphExecNodeSetParams` """ - dependencyData = [] if dependencyData is None else dependencyData - if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData): - raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") - cdef cydriver.CUgraph cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (CUgraph,)): - phGraph = int(hGraph) - else: - phGraph = int(CUgraph(hGraph)) - cyhGraph = phGraph - cdef CUgraphNode phGraphNode = CUgraphNode() - cdef cydriver.CUgraphNode* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr + cdef cydriver.CUgraphNodeParams* cynodeParams_ptr cdef cydriver.CUgraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 1: - cydependencyData = calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData)) - if cydependencyData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) - for idx in range(len(dependencyData)): - string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) - elif len(dependencyData) == 1: - cydependencyData = (dependencyData[0])._pvt_ptr - cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cydriver.cuGraphAddNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, cydependencyData, numDependencies, cynodeParams_ptr) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) - if len(dependencyData) > 1 and cydependencyData is not NULL: - free(cydependencyData) + cdef cydriver.CUgraphNode* cydependencies = NULL + cdef cydriver.CUgraph cyhGraph + cdef CUgraphNode phGraphNode + try: + phGraphNode = CUgraphNode() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (CUgraph,)): + phGraph = int(hGraph) + else: + phGraph = int(CUgraph(hGraph)) + cyhGraph = phGraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cydriver.CUgraphNode)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cydriver.CUgraphNode))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + dependencyData = [] if dependencyData is None else dependencyData + if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData): + raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") + if len(dependencyData) > 1: + cydependencyData = calloc(len(dependencyData), sizeof(cydriver.CUgraphEdgeData)) + if cydependencyData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cydriver.CUgraphEdgeData))) + for idx in range(len(dependencyData)): + string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cydriver.CUgraphEdgeData)) + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cydriver.cuGraphAddNode(phGraphNode._pvt_ptr, cyhGraph, cydependencies, cydependencyData, numDependencies, cynodeParams_ptr) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) + if len(dependencyData) > 1 and cydependencyData is not NULL: + free(cydependencyData) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phGraphNode) @@ -48740,6 +49180,7 @@ def cuGraphNodeSetParams(hNode, nodeParams : Optional[CUgraphNodeParams]): -------- :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphExecNodeSetParams` """ + cdef cydriver.CUgraphNodeParams* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode if hNode is None: phNode = 0 @@ -48748,7 +49189,7 @@ def cuGraphNodeSetParams(hNode, nodeParams : Optional[CUgraphNodeParams]): else: phNode = int(CUgraphNode(hNode)) cyhNode = phNode - cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphNodeSetParams(cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -48792,14 +49233,8 @@ def cuGraphExecNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUgraphNod -------- :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphNodeSetParams` :py:obj:`~.cuGraphExecUpdate`, :py:obj:`~.cuGraphInstantiate` """ + cdef cydriver.CUgraphNodeParams* cynodeParams_ptr cdef cydriver.CUgraphNode cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (CUgraphNode,)): - phNode = int(hNode) - else: - phNode = int(CUgraphNode(hNode)) - cyhNode = phNode cdef cydriver.CUgraphExec cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -48808,7 +49243,14 @@ def cuGraphExecNodeSetParams(hGraphExec, hNode, nodeParams : Optional[CUgraphNod else: phGraphExec = int(CUgraphExec(hGraphExec)) cyhGraphExec = phGraphExec - cdef cydriver.CUgraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + if hNode is None: + phNode = 0 + elif isinstance(hNode, (CUgraphNode,)): + phNode = int(hNode) + else: + phNode = int(CUgraphNode(hNode)) + cyhNode = phNode + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cydriver.cuGraphExecNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_CUresult(err),) @@ -48856,14 +49298,9 @@ def cuGraphConditionalHandleCreate(hGraph, ctx, unsigned int defaultLaunchValue, :py:obj:`~.cuGraphAddNode` """ cdef cydriver.CUcontext cyctx - if ctx is None: - pctx = 0 - elif isinstance(ctx, (CUcontext,)): - pctx = int(ctx) - else: - pctx = int(CUcontext(ctx)) - cyctx = pctx cdef cydriver.CUgraph cyhGraph + cdef CUgraphConditionalHandle pHandle_out + pHandle_out = CUgraphConditionalHandle() if hGraph is None: phGraph = 0 elif isinstance(hGraph, (CUgraph,)): @@ -48871,7 +49308,13 @@ def cuGraphConditionalHandleCreate(hGraph, ctx, unsigned int defaultLaunchValue, else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - cdef CUgraphConditionalHandle pHandle_out = CUgraphConditionalHandle() + if ctx is None: + pctx = 0 + elif isinstance(ctx, (CUcontext,)): + pctx = int(ctx) + else: + pctx = int(CUcontext(ctx)) + cyctx = pctx with nogil: err = cydriver.cuGraphConditionalHandleCreate(pHandle_out._pvt_ptr, cyhGraph, cyctx, defaultLaunchValue, flags) if err != cydriver.CUDA_SUCCESS: @@ -48915,6 +49358,7 @@ def cuOccupancyMaxActiveBlocksPerMultiprocessor(func, int blockSize, size_t dyna :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor` """ cdef cydriver.CUfunction cyfunc + cdef int numBlocks = 0 if func is None: pfunc = 0 elif isinstance(func, (CUfunction,)): @@ -48922,7 +49366,6 @@ def cuOccupancyMaxActiveBlocksPerMultiprocessor(func, int blockSize, size_t dyna else: pfunc = int(CUfunction(func)) cyfunc = pfunc - cdef int numBlocks = 0 with nogil: err = cydriver.cuOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc, blockSize, dynamicSMemSize) if err != cydriver.CUDA_SUCCESS: @@ -48984,6 +49427,7 @@ def cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(func, int blockSize, si :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` """ cdef cydriver.CUfunction cyfunc + cdef int numBlocks = 0 if func is None: pfunc = 0 elif isinstance(func, (CUfunction,)): @@ -48991,7 +49435,6 @@ def cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(func, int blockSize, si else: pfunc = int(CUfunction(func)) cyfunc = pfunc - cdef int numBlocks = 0 with nogil: err = cydriver.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc, blockSize, dynamicSMemSize, flags) if err != cydriver.CUDA_SUCCESS: @@ -49062,14 +49505,9 @@ def cuOccupancyMaxPotentialBlockSize(func, blockSizeToDynamicSMemSize, size_t dy :py:obj:`~.cudaOccupancyMaxPotentialBlockSize` """ cdef cydriver.CUoccupancyB2DSize cyblockSizeToDynamicSMemSize - if blockSizeToDynamicSMemSize is None: - pblockSizeToDynamicSMemSize = 0 - elif isinstance(blockSizeToDynamicSMemSize, (CUoccupancyB2DSize,)): - pblockSizeToDynamicSMemSize = int(blockSizeToDynamicSMemSize) - else: - pblockSizeToDynamicSMemSize = int(CUoccupancyB2DSize(blockSizeToDynamicSMemSize)) - cyblockSizeToDynamicSMemSize = pblockSizeToDynamicSMemSize cdef cydriver.CUfunction cyfunc + cdef int blockSize = 0 + cdef int minGridSize = 0 if func is None: pfunc = 0 elif isinstance(func, (CUfunction,)): @@ -49077,8 +49515,13 @@ def cuOccupancyMaxPotentialBlockSize(func, blockSizeToDynamicSMemSize, size_t dy else: pfunc = int(CUfunction(func)) cyfunc = pfunc - cdef int minGridSize = 0 - cdef int blockSize = 0 + if blockSizeToDynamicSMemSize is None: + pblockSizeToDynamicSMemSize = 0 + elif isinstance(blockSizeToDynamicSMemSize, (CUoccupancyB2DSize,)): + pblockSizeToDynamicSMemSize = int(blockSizeToDynamicSMemSize) + else: + pblockSizeToDynamicSMemSize = int(CUoccupancyB2DSize(blockSizeToDynamicSMemSize)) + cyblockSizeToDynamicSMemSize = pblockSizeToDynamicSMemSize with nogil: err = cydriver.cuOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, cyfunc, cyblockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit) if err != cydriver.CUDA_SUCCESS: @@ -49148,14 +49591,9 @@ def cuOccupancyMaxPotentialBlockSizeWithFlags(func, blockSizeToDynamicSMemSize, :py:obj:`~.cudaOccupancyMaxPotentialBlockSizeWithFlags` """ cdef cydriver.CUoccupancyB2DSize cyblockSizeToDynamicSMemSize - if blockSizeToDynamicSMemSize is None: - pblockSizeToDynamicSMemSize = 0 - elif isinstance(blockSizeToDynamicSMemSize, (CUoccupancyB2DSize,)): - pblockSizeToDynamicSMemSize = int(blockSizeToDynamicSMemSize) - else: - pblockSizeToDynamicSMemSize = int(CUoccupancyB2DSize(blockSizeToDynamicSMemSize)) - cyblockSizeToDynamicSMemSize = pblockSizeToDynamicSMemSize cdef cydriver.CUfunction cyfunc + cdef int blockSize = 0 + cdef int minGridSize = 0 if func is None: pfunc = 0 elif isinstance(func, (CUfunction,)): @@ -49163,8 +49601,13 @@ def cuOccupancyMaxPotentialBlockSizeWithFlags(func, blockSizeToDynamicSMemSize, else: pfunc = int(CUfunction(func)) cyfunc = pfunc - cdef int minGridSize = 0 - cdef int blockSize = 0 + if blockSizeToDynamicSMemSize is None: + pblockSizeToDynamicSMemSize = 0 + elif isinstance(blockSizeToDynamicSMemSize, (CUoccupancyB2DSize,)): + pblockSizeToDynamicSMemSize = int(blockSizeToDynamicSMemSize) + else: + pblockSizeToDynamicSMemSize = int(CUoccupancyB2DSize(blockSizeToDynamicSMemSize)) + cyblockSizeToDynamicSMemSize = pblockSizeToDynamicSMemSize with nogil: err = cydriver.cuOccupancyMaxPotentialBlockSizeWithFlags(&minGridSize, &blockSize, cyfunc, cyblockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit, flags) if err != cydriver.CUDA_SUCCESS: @@ -49204,6 +49647,7 @@ def cuOccupancyAvailableDynamicSMemPerBlock(func, int numBlocks, int blockSize): Returned maximum dynamic shared memory """ cdef cydriver.CUfunction cyfunc + cdef size_t dynamicSmemSize = 0 if func is None: pfunc = 0 elif isinstance(func, (CUfunction,)): @@ -49211,7 +49655,6 @@ def cuOccupancyAvailableDynamicSMemPerBlock(func, int numBlocks, int blockSize): else: pfunc = int(CUfunction(func)) cyfunc = pfunc - cdef size_t dynamicSmemSize = 0 with nogil: err = cydriver.cuOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc, numBlocks, blockSize) if err != cydriver.CUDA_SUCCESS: @@ -49262,7 +49705,9 @@ def cuOccupancyMaxPotentialClusterSize(func, config : Optional[CUlaunchConfig]): -------- :py:obj:`~.cudaFuncGetAttributes`, :py:obj:`~.cuFuncGetAttribute` """ + cdef cydriver.CUlaunchConfig* cyconfig_ptr cdef cydriver.CUfunction cyfunc + cdef int clusterSize = 0 if func is None: pfunc = 0 elif isinstance(func, (CUfunction,)): @@ -49270,8 +49715,7 @@ def cuOccupancyMaxPotentialClusterSize(func, config : Optional[CUlaunchConfig]): else: pfunc = int(CUfunction(func)) cyfunc = pfunc - cdef int clusterSize = 0 - cdef cydriver.CUlaunchConfig* cyconfig_ptr = config._pvt_ptr if config is not None else NULL + cyconfig_ptr = config._pvt_ptr if config is not None else NULL with nogil: err = cydriver.cuOccupancyMaxPotentialClusterSize(&clusterSize, cyfunc, cyconfig_ptr) if err != cydriver.CUDA_SUCCESS: @@ -49322,7 +49766,9 @@ def cuOccupancyMaxActiveClusters(func, config : Optional[CUlaunchConfig]): -------- :py:obj:`~.cudaFuncGetAttributes`, :py:obj:`~.cuFuncGetAttribute` """ + cdef cydriver.CUlaunchConfig* cyconfig_ptr cdef cydriver.CUfunction cyfunc + cdef int numClusters = 0 if func is None: pfunc = 0 elif isinstance(func, (CUfunction,)): @@ -49330,8 +49776,7 @@ def cuOccupancyMaxActiveClusters(func, config : Optional[CUlaunchConfig]): else: pfunc = int(CUfunction(func)) cyfunc = pfunc - cdef int numClusters = 0 - cdef cydriver.CUlaunchConfig* cyconfig_ptr = config._pvt_ptr if config is not None else NULL + cyconfig_ptr = config._pvt_ptr if config is not None else NULL with nogil: err = cydriver.cuOccupancyMaxActiveClusters(&numClusters, cyfunc, cyconfig_ptr) if err != cydriver.CUDA_SUCCESS: @@ -49372,13 +49817,6 @@ def cuTexRefSetArray(hTexRef, hArray, unsigned int Flags): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUarray cyhArray - if hArray is None: - phArray = 0 - elif isinstance(hArray, (CUarray,)): - phArray = int(hArray) - else: - phArray = int(CUarray(hArray)) - cyhArray = phArray cdef cydriver.CUtexref cyhTexRef if hTexRef is None: phTexRef = 0 @@ -49387,6 +49825,13 @@ def cuTexRefSetArray(hTexRef, hArray, unsigned int Flags): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef + if hArray is None: + phArray = 0 + elif isinstance(hArray, (CUarray,)): + phArray = int(hArray) + else: + phArray = int(CUarray(hArray)) + cyhArray = phArray with nogil: err = cydriver.cuTexRefSetArray(cyhTexRef, cyhArray, Flags) return (_CUresult(err),) @@ -49425,13 +49870,6 @@ def cuTexRefSetMipmappedArray(hTexRef, hMipmappedArray, unsigned int Flags): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUmipmappedArray cyhMipmappedArray - if hMipmappedArray is None: - phMipmappedArray = 0 - elif isinstance(hMipmappedArray, (CUmipmappedArray,)): - phMipmappedArray = int(hMipmappedArray) - else: - phMipmappedArray = int(CUmipmappedArray(hMipmappedArray)) - cyhMipmappedArray = phMipmappedArray cdef cydriver.CUtexref cyhTexRef if hTexRef is None: phTexRef = 0 @@ -49440,6 +49878,13 @@ def cuTexRefSetMipmappedArray(hTexRef, hMipmappedArray, unsigned int Flags): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef + if hMipmappedArray is None: + phMipmappedArray = 0 + elif isinstance(hMipmappedArray, (CUmipmappedArray,)): + phMipmappedArray = int(hMipmappedArray) + else: + phMipmappedArray = int(CUmipmappedArray(hMipmappedArray)) + cyhMipmappedArray = phMipmappedArray with nogil: err = cydriver.cuTexRefSetMipmappedArray(cyhTexRef, cyhMipmappedArray, Flags) return (_CUresult(err),) @@ -49497,14 +49942,8 @@ def cuTexRefSetAddress(hTexRef, dptr, size_t numbytes): :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUdeviceptr cydptr - if dptr is None: - pdptr = 0 - elif isinstance(dptr, (CUdeviceptr,)): - pdptr = int(dptr) - else: - pdptr = int(CUdeviceptr(dptr)) - cydptr = pdptr cdef cydriver.CUtexref cyhTexRef + cdef size_t ByteOffset = 0 if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -49512,7 +49951,13 @@ def cuTexRefSetAddress(hTexRef, dptr, size_t numbytes): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef size_t ByteOffset = 0 + if dptr is None: + pdptr = 0 + elif isinstance(dptr, (CUdeviceptr,)): + pdptr = int(dptr) + else: + pdptr = int(CUdeviceptr(dptr)) + cydptr = pdptr with nogil: err = cydriver.cuTexRefSetAddress(&ByteOffset, cyhTexRef, cydptr, numbytes) if err != cydriver.CUDA_SUCCESS: @@ -49580,13 +50025,7 @@ def cuTexRefSetAddress2D(hTexRef, desc : Optional[CUDA_ARRAY_DESCRIPTOR], dptr, :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUdeviceptr cydptr - if dptr is None: - pdptr = 0 - elif isinstance(dptr, (CUdeviceptr,)): - pdptr = int(dptr) - else: - pdptr = int(CUdeviceptr(dptr)) - cydptr = pdptr + cdef cydriver.CUDA_ARRAY_DESCRIPTOR* cydesc_ptr cdef cydriver.CUtexref cyhTexRef if hTexRef is None: phTexRef = 0 @@ -49595,7 +50034,14 @@ def cuTexRefSetAddress2D(hTexRef, desc : Optional[CUDA_ARRAY_DESCRIPTOR], dptr, else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef cydriver.CUDA_ARRAY_DESCRIPTOR* cydesc_ptr = desc._pvt_ptr if desc is not None else NULL + cydesc_ptr = desc._pvt_ptr if desc is not None else NULL + if dptr is None: + pdptr = 0 + elif isinstance(dptr, (CUdeviceptr,)): + pdptr = int(dptr) + else: + pdptr = int(CUdeviceptr(dptr)) + cydptr = pdptr with nogil: err = cydriver.cuTexRefSetAddress2D(cyhTexRef, cydesc_ptr, cydptr, Pitch) return (_CUresult(err),) @@ -49633,6 +50079,7 @@ def cuTexRefSetFormat(hTexRef, fmt not None : CUarray_format, int NumPackedCompo -------- :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat`, :py:obj:`~.cudaCreateChannelDesc` """ + cdef cydriver.CUarray_format cyfmt cdef cydriver.CUtexref cyhTexRef if hTexRef is None: phTexRef = 0 @@ -49641,7 +50088,7 @@ def cuTexRefSetFormat(hTexRef, fmt not None : CUarray_format, int NumPackedCompo else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef cydriver.CUarray_format cyfmt = int(fmt) + cyfmt = int(fmt) with nogil: err = cydriver.cuTexRefSetFormat(cyhTexRef, cyfmt, NumPackedComponents) return (_CUresult(err),) @@ -49686,6 +50133,7 @@ def cuTexRefSetAddressMode(hTexRef, int dim, am not None : CUaddress_mode): -------- :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ + cdef cydriver.CUaddress_mode cyam cdef cydriver.CUtexref cyhTexRef if hTexRef is None: phTexRef = 0 @@ -49694,7 +50142,7 @@ def cuTexRefSetAddressMode(hTexRef, int dim, am not None : CUaddress_mode): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef cydriver.CUaddress_mode cyam = int(am) + cyam = int(am) with nogil: err = cydriver.cuTexRefSetAddressMode(cyhTexRef, dim, cyam) return (_CUresult(err),) @@ -49733,6 +50181,7 @@ def cuTexRefSetFilterMode(hTexRef, fm not None : CUfilter_mode): -------- :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ + cdef cydriver.CUfilter_mode cyfm cdef cydriver.CUtexref cyhTexRef if hTexRef is None: phTexRef = 0 @@ -49741,7 +50190,7 @@ def cuTexRefSetFilterMode(hTexRef, fm not None : CUfilter_mode): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef cydriver.CUfilter_mode cyfm = int(fm) + cyfm = int(fm) with nogil: err = cydriver.cuTexRefSetFilterMode(cyhTexRef, cyfm) return (_CUresult(err),) @@ -49780,6 +50229,7 @@ def cuTexRefSetMipmapFilterMode(hTexRef, fm not None : CUfilter_mode): -------- :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ + cdef cydriver.CUfilter_mode cyfm cdef cydriver.CUtexref cyhTexRef if hTexRef is None: phTexRef = 0 @@ -49788,7 +50238,7 @@ def cuTexRefSetMipmapFilterMode(hTexRef, fm not None : CUfilter_mode): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef cydriver.CUfilter_mode cyfm = int(fm) + cyfm = int(fm) with nogil: err = cydriver.cuTexRefSetMipmapFilterMode(cyhTexRef, cyfm) return (_CUresult(err),) @@ -50062,6 +50512,8 @@ def cuTexRefGetAddress(hTexRef): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUtexref cyhTexRef + cdef CUdeviceptr pdptr + pdptr = CUdeviceptr() if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50069,7 +50521,6 @@ def cuTexRefGetAddress(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef CUdeviceptr pdptr = CUdeviceptr() with nogil: err = cydriver.cuTexRefGetAddress(pdptr._pvt_ptr, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50106,6 +50557,8 @@ def cuTexRefGetArray(hTexRef): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUtexref cyhTexRef + cdef CUarray phArray + phArray = CUarray() if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50113,7 +50566,6 @@ def cuTexRefGetArray(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef CUarray phArray = CUarray() with nogil: err = cydriver.cuTexRefGetArray(phArray._pvt_ptr, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50151,6 +50603,8 @@ def cuTexRefGetMipmappedArray(hTexRef): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUtexref cyhTexRef + cdef CUmipmappedArray phMipmappedArray + phMipmappedArray = CUmipmappedArray() if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50158,7 +50612,6 @@ def cuTexRefGetMipmappedArray(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef CUmipmappedArray phMipmappedArray = CUmipmappedArray() with nogil: err = cydriver.cuTexRefGetMipmappedArray(phMipmappedArray._pvt_ptr, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50197,6 +50650,7 @@ def cuTexRefGetAddressMode(hTexRef, int dim): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUtexref cyhTexRef + cdef cydriver.CUaddress_mode pam if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50204,7 +50658,6 @@ def cuTexRefGetAddressMode(hTexRef, int dim): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef cydriver.CUaddress_mode pam with nogil: err = cydriver.cuTexRefGetAddressMode(&pam, cyhTexRef, dim) if err != cydriver.CUDA_SUCCESS: @@ -50240,6 +50693,7 @@ def cuTexRefGetFilterMode(hTexRef): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUtexref cyhTexRef + cdef cydriver.CUfilter_mode pfm if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50247,7 +50701,6 @@ def cuTexRefGetFilterMode(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef cydriver.CUfilter_mode pfm with nogil: err = cydriver.cuTexRefGetFilterMode(&pfm, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50286,6 +50739,8 @@ def cuTexRefGetFormat(hTexRef): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags` """ cdef cydriver.CUtexref cyhTexRef + cdef int pNumChannels = 0 + cdef cydriver.CUarray_format pFormat if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50293,8 +50748,6 @@ def cuTexRefGetFormat(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef cydriver.CUarray_format pFormat - cdef int pNumChannels = 0 with nogil: err = cydriver.cuTexRefGetFormat(&pFormat, &pNumChannels, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50330,6 +50783,7 @@ def cuTexRefGetMipmapFilterMode(hTexRef): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUtexref cyhTexRef + cdef cydriver.CUfilter_mode pfm if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50337,7 +50791,6 @@ def cuTexRefGetMipmapFilterMode(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef cydriver.CUfilter_mode pfm with nogil: err = cydriver.cuTexRefGetMipmapFilterMode(&pfm, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50374,6 +50827,7 @@ def cuTexRefGetMipmapLevelBias(hTexRef): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUtexref cyhTexRef + cdef float pbias = 0 if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50381,7 +50835,6 @@ def cuTexRefGetMipmapLevelBias(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef float pbias = 0 with nogil: err = cydriver.cuTexRefGetMipmapLevelBias(&pbias, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50420,6 +50873,8 @@ def cuTexRefGetMipmapLevelClamp(hTexRef): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUtexref cyhTexRef + cdef float pmaxMipmapLevelClamp = 0 + cdef float pminMipmapLevelClamp = 0 if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50427,8 +50882,6 @@ def cuTexRefGetMipmapLevelClamp(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef float pminMipmapLevelClamp = 0 - cdef float pmaxMipmapLevelClamp = 0 with nogil: err = cydriver.cuTexRefGetMipmapLevelClamp(&pminMipmapLevelClamp, &pmaxMipmapLevelClamp, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50464,6 +50917,7 @@ def cuTexRefGetMaxAnisotropy(hTexRef): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFlags`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUtexref cyhTexRef + cdef int pmaxAniso = 0 if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50471,7 +50925,6 @@ def cuTexRefGetMaxAnisotropy(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef int pmaxAniso = 0 with nogil: err = cydriver.cuTexRefGetMaxAnisotropy(&pmaxAniso, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50510,6 +50963,7 @@ def cuTexRefGetBorderColor(hTexRef): :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetBorderColor` """ cdef cydriver.CUtexref cyhTexRef + cdef float pBorderColor = 0 if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50517,7 +50971,6 @@ def cuTexRefGetBorderColor(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef float pBorderColor = 0 with nogil: err = cydriver.cuTexRefGetBorderColor(&pBorderColor, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50552,6 +51005,7 @@ def cuTexRefGetFlags(hTexRef): :py:obj:`~.cuTexRefSetAddress`, :py:obj:`~.cuTexRefSetAddress2D`, :py:obj:`~.cuTexRefSetAddressMode`, :py:obj:`~.cuTexRefSetArray`, :py:obj:`~.cuTexRefSetFilterMode`, :py:obj:`~.cuTexRefSetFlags`, :py:obj:`~.cuTexRefSetFormat`, :py:obj:`~.cuTexRefGetAddress`, :py:obj:`~.cuTexRefGetAddressMode`, :py:obj:`~.cuTexRefGetArray`, :py:obj:`~.cuTexRefGetFilterMode`, :py:obj:`~.cuTexRefGetFormat` """ cdef cydriver.CUtexref cyhTexRef + cdef unsigned int pFlags = 0 if hTexRef is None: phTexRef = 0 elif isinstance(hTexRef, (CUtexref,)): @@ -50559,7 +51013,6 @@ def cuTexRefGetFlags(hTexRef): else: phTexRef = int(CUtexref(hTexRef)) cyhTexRef = phTexRef - cdef unsigned int pFlags = 0 with nogil: err = cydriver.cuTexRefGetFlags(&pFlags, cyhTexRef) if err != cydriver.CUDA_SUCCESS: @@ -50593,7 +51046,8 @@ def cuTexRefCreate(): -------- :py:obj:`~.cuTexRefDestroy` """ - cdef CUtexref pTexRef = CUtexref() + cdef CUtexref pTexRef + pTexRef = CUtexref() with nogil: err = cydriver.cuTexRefCreate(pTexRef._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -50672,13 +51126,6 @@ def cuSurfRefSetArray(hSurfRef, hArray, unsigned int Flags): :py:obj:`~.cuModuleGetSurfRef`, :py:obj:`~.cuSurfRefGetArray` """ cdef cydriver.CUarray cyhArray - if hArray is None: - phArray = 0 - elif isinstance(hArray, (CUarray,)): - phArray = int(hArray) - else: - phArray = int(CUarray(hArray)) - cyhArray = phArray cdef cydriver.CUsurfref cyhSurfRef if hSurfRef is None: phSurfRef = 0 @@ -50687,6 +51134,13 @@ def cuSurfRefSetArray(hSurfRef, hArray, unsigned int Flags): else: phSurfRef = int(CUsurfref(hSurfRef)) cyhSurfRef = phSurfRef + if hArray is None: + phArray = 0 + elif isinstance(hArray, (CUarray,)): + phArray = int(hArray) + else: + phArray = int(CUarray(hArray)) + cyhArray = phArray with nogil: err = cydriver.cuSurfRefSetArray(cyhSurfRef, cyhArray, Flags) return (_CUresult(err),) @@ -50721,6 +51175,8 @@ def cuSurfRefGetArray(hSurfRef): :py:obj:`~.cuModuleGetSurfRef`, :py:obj:`~.cuSurfRefSetArray` """ cdef cydriver.CUsurfref cyhSurfRef + cdef CUarray phArray + phArray = CUarray() if hSurfRef is None: phSurfRef = 0 elif isinstance(hSurfRef, (CUsurfref,)): @@ -50728,7 +51184,6 @@ def cuSurfRefGetArray(hSurfRef): else: phSurfRef = int(CUsurfref(hSurfRef)) cyhSurfRef = phSurfRef - cdef CUarray phArray = CUarray() with nogil: err = cydriver.cuSurfRefGetArray(phArray._pvt_ptr, cyhSurfRef) if err != cydriver.CUDA_SUCCESS: @@ -50965,10 +51420,14 @@ def cuTexObjectCreate(pResDesc : Optional[CUDA_RESOURCE_DESC], pTexDesc : Option -------- :py:obj:`~.cuTexObjectDestroy`, :py:obj:`~.cudaCreateTextureObject` """ - cdef CUtexObject pTexObject = CUtexObject() - cdef cydriver.CUDA_RESOURCE_DESC* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL - cdef cydriver.CUDA_TEXTURE_DESC* cypTexDesc_ptr = pTexDesc._pvt_ptr if pTexDesc is not None else NULL - cdef cydriver.CUDA_RESOURCE_VIEW_DESC* cypResViewDesc_ptr = pResViewDesc._pvt_ptr if pResViewDesc is not None else NULL + cdef cydriver.CUDA_RESOURCE_VIEW_DESC* cypResViewDesc_ptr + cdef cydriver.CUDA_TEXTURE_DESC* cypTexDesc_ptr + cdef cydriver.CUDA_RESOURCE_DESC* cypResDesc_ptr + cdef CUtexObject pTexObject + pTexObject = CUtexObject() + cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL + cypTexDesc_ptr = pTexDesc._pvt_ptr if pTexDesc is not None else NULL + cypResViewDesc_ptr = pResViewDesc._pvt_ptr if pResViewDesc is not None else NULL with nogil: err = cydriver.cuTexObjectCreate(pTexObject._pvt_ptr, cypResDesc_ptr, cypTexDesc_ptr, cypResViewDesc_ptr) if err != cydriver.CUDA_SUCCESS: @@ -51037,6 +51496,8 @@ def cuTexObjectGetResourceDesc(texObject): :py:obj:`~.cuTexObjectCreate`, :py:obj:`~.cudaGetTextureObjectResourceDesc`, """ cdef cydriver.CUtexObject cytexObject + cdef CUDA_RESOURCE_DESC pResDesc + pResDesc = CUDA_RESOURCE_DESC() if texObject is None: ptexObject = 0 elif isinstance(texObject, (CUtexObject,)): @@ -51044,7 +51505,6 @@ def cuTexObjectGetResourceDesc(texObject): else: ptexObject = int(CUtexObject(texObject)) cytexObject = ptexObject - cdef CUDA_RESOURCE_DESC pResDesc = CUDA_RESOURCE_DESC() with nogil: err = cydriver.cuTexObjectGetResourceDesc(pResDesc._pvt_ptr, cytexObject) if err != cydriver.CUDA_SUCCESS: @@ -51078,6 +51538,8 @@ def cuTexObjectGetTextureDesc(texObject): :py:obj:`~.cuTexObjectCreate`, :py:obj:`~.cudaGetTextureObjectTextureDesc` """ cdef cydriver.CUtexObject cytexObject + cdef CUDA_TEXTURE_DESC pTexDesc + pTexDesc = CUDA_TEXTURE_DESC() if texObject is None: ptexObject = 0 elif isinstance(texObject, (CUtexObject,)): @@ -51085,7 +51547,6 @@ def cuTexObjectGetTextureDesc(texObject): else: ptexObject = int(CUtexObject(texObject)) cytexObject = ptexObject - cdef CUDA_TEXTURE_DESC pTexDesc = CUDA_TEXTURE_DESC() with nogil: err = cydriver.cuTexObjectGetTextureDesc(pTexDesc._pvt_ptr, cytexObject) if err != cydriver.CUDA_SUCCESS: @@ -51120,6 +51581,8 @@ def cuTexObjectGetResourceViewDesc(texObject): :py:obj:`~.cuTexObjectCreate`, :py:obj:`~.cudaGetTextureObjectResourceViewDesc` """ cdef cydriver.CUtexObject cytexObject + cdef CUDA_RESOURCE_VIEW_DESC pResViewDesc + pResViewDesc = CUDA_RESOURCE_VIEW_DESC() if texObject is None: ptexObject = 0 elif isinstance(texObject, (CUtexObject,)): @@ -51127,7 +51590,6 @@ def cuTexObjectGetResourceViewDesc(texObject): else: ptexObject = int(CUtexObject(texObject)) cytexObject = ptexObject - cdef CUDA_RESOURCE_VIEW_DESC pResViewDesc = CUDA_RESOURCE_VIEW_DESC() with nogil: err = cydriver.cuTexObjectGetResourceViewDesc(pResViewDesc._pvt_ptr, cytexObject) if err != cydriver.CUDA_SUCCESS: @@ -51169,8 +51631,10 @@ def cuSurfObjectCreate(pResDesc : Optional[CUDA_RESOURCE_DESC]): -------- :py:obj:`~.cuSurfObjectDestroy`, :py:obj:`~.cudaCreateSurfaceObject` """ - cdef CUsurfObject pSurfObject = CUsurfObject() - cdef cydriver.CUDA_RESOURCE_DESC* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL + cdef cydriver.CUDA_RESOURCE_DESC* cypResDesc_ptr + cdef CUsurfObject pSurfObject + pSurfObject = CUsurfObject() + cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL with nogil: err = cydriver.cuSurfObjectCreate(pSurfObject._pvt_ptr, cypResDesc_ptr) if err != cydriver.CUDA_SUCCESS: @@ -51239,6 +51703,8 @@ def cuSurfObjectGetResourceDesc(surfObject): :py:obj:`~.cuSurfObjectCreate`, :py:obj:`~.cudaGetSurfaceObjectResourceDesc` """ cdef cydriver.CUsurfObject cysurfObject + cdef CUDA_RESOURCE_DESC pResDesc + pResDesc = CUDA_RESOURCE_DESC() if surfObject is None: psurfObject = 0 elif isinstance(surfObject, (CUsurfObject,)): @@ -51246,7 +51712,6 @@ def cuSurfObjectGetResourceDesc(surfObject): else: psurfObject = int(CUsurfObject(surfObject)) cysurfObject = psurfObject - cdef CUDA_RESOURCE_DESC pResDesc = CUDA_RESOURCE_DESC() with nogil: err = cydriver.cuSurfObjectGetResourceDesc(pResDesc._pvt_ptr, cysurfObject) if err != cydriver.CUDA_SUCCESS: @@ -51482,81 +51947,90 @@ def cuTensorMapEncodeTiled(tensorDataType not None : CUtensorMapDataType, tensor -------- :py:obj:`~.cuTensorMapEncodeIm2col`, :py:obj:`~.cuTensorMapEncodeIm2colWide`, :py:obj:`~.cuTensorMapReplaceAddress` """ + cdef cydriver.CUtensorMapFloatOOBfill cyoobFill + cdef cydriver.CUtensorMapL2promotion cyl2Promotion + cdef cydriver.CUtensorMapSwizzle cyswizzle + cdef cydriver.CUtensorMapInterleave cyinterleave cdef cydriver.cuuint32_t* cyelementStrides cdef size_t elementStridesLen cdef cydriver.cuuint32_t[5] elementStridesStatic - elementStridesLen = 0 if elementStrides is None else len(elementStrides) - if elementStridesLen == 0: - cyelementStrides = NULL - elif elementStridesLen == 1: - cyelementStrides = ( elementStrides[0])._pvt_ptr - elif elementStridesLen <= 5: - for idx in range(elementStridesLen): - elementStridesStatic[idx] = ( elementStrides[idx])._pvt_ptr[0] - cyelementStrides = elementStridesStatic - else: - raise ValueError("Argument 'elementStrides' too long, must be <= 5") cdef cydriver.cuuint32_t* cyboxDim cdef size_t boxDimLen cdef cydriver.cuuint32_t[5] boxDimStatic - boxDimLen = 0 if boxDim is None else len(boxDim) - if boxDimLen == 0: - cyboxDim = NULL - elif boxDimLen == 1: - cyboxDim = ( boxDim[0])._pvt_ptr - elif boxDimLen <= 5: - for idx in range(boxDimLen): - boxDimStatic[idx] = ( boxDim[idx])._pvt_ptr[0] - cyboxDim = boxDimStatic - else: - raise ValueError("Argument 'boxDim' too long, must be <= 5") cdef cydriver.cuuint64_t* cyglobalStrides cdef size_t globalStridesLen cdef cydriver.cuuint64_t[5] globalStridesStatic - globalStridesLen = 0 if globalStrides is None else len(globalStrides) - if globalStridesLen == 0: - cyglobalStrides = NULL - elif globalStridesLen == 1: - cyglobalStrides = ( globalStrides[0])._pvt_ptr - elif globalStridesLen <= 5: - for idx in range(globalStridesLen): - globalStridesStatic[idx] = ( globalStrides[idx])._pvt_ptr[0] - cyglobalStrides = globalStridesStatic - else: - raise ValueError("Argument 'globalStrides' too long, must be <= 5") cdef cydriver.cuuint64_t* cyglobalDim cdef size_t globalDimLen cdef cydriver.cuuint64_t[5] globalDimStatic - globalDimLen = 0 if globalDim is None else len(globalDim) - if globalDimLen == 0: - cyglobalDim = NULL - elif globalDimLen == 1: - cyglobalDim = ( globalDim[0])._pvt_ptr - elif globalDimLen <= 5: - for idx in range(globalDimLen): - globalDimStatic[idx] = ( globalDim[idx])._pvt_ptr[0] - cyglobalDim = globalDimStatic - else: - raise ValueError("Argument 'globalDim' too long, must be <= 5") - cdef cydriver.cuuint32_t cytensorRank - if tensorRank is None: - ptensorRank = 0 - elif isinstance(tensorRank, (cuuint32_t,)): - ptensorRank = int(tensorRank) - else: - ptensorRank = int(cuuint32_t(tensorRank)) - cytensorRank = ptensorRank - cdef CUtensorMap tensorMap = CUtensorMap() - cdef cydriver.CUtensorMapDataType cytensorDataType = int(tensorDataType) cdef _HelperInputVoidPtrStruct cyglobalAddressHelper - cdef void* cyglobalAddress = _helper_input_void_ptr(globalAddress, &cyglobalAddressHelper) - cdef cydriver.CUtensorMapInterleave cyinterleave = int(interleave) - cdef cydriver.CUtensorMapSwizzle cyswizzle = int(swizzle) - cdef cydriver.CUtensorMapL2promotion cyl2Promotion = int(l2Promotion) - cdef cydriver.CUtensorMapFloatOOBfill cyoobFill = int(oobFill) - with nogil: - err = cydriver.cuTensorMapEncodeTiled(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress, cyglobalDim, cyglobalStrides, cyboxDim, cyelementStrides, cyinterleave, cyswizzle, cyl2Promotion, cyoobFill) - _helper_input_void_ptr_free(&cyglobalAddressHelper) + cdef void* cyglobalAddress + cdef cydriver.cuuint32_t cytensorRank + cdef cydriver.CUtensorMapDataType cytensorDataType + cdef CUtensorMap tensorMap + try: + tensorMap = CUtensorMap() + cytensorDataType = int(tensorDataType) + if tensorRank is None: + ptensorRank = 0 + elif isinstance(tensorRank, (cuuint32_t,)): + ptensorRank = int(tensorRank) + else: + ptensorRank = int(cuuint32_t(tensorRank)) + cytensorRank = ptensorRank + cyglobalAddress = _helper_input_void_ptr(globalAddress, &cyglobalAddressHelper) + globalDimLen = 0 if globalDim is None else len(globalDim) + if globalDimLen == 0: + cyglobalDim = NULL + elif globalDimLen == 1: + cyglobalDim = ( globalDim[0])._pvt_ptr + elif globalDimLen <= 5: + for idx in range(globalDimLen): + globalDimStatic[idx] = ( globalDim[idx])._pvt_ptr[0] + cyglobalDim = globalDimStatic + else: + raise ValueError("Argument 'globalDim' too long, must be <= 5") + globalStridesLen = 0 if globalStrides is None else len(globalStrides) + if globalStridesLen == 0: + cyglobalStrides = NULL + elif globalStridesLen == 1: + cyglobalStrides = ( globalStrides[0])._pvt_ptr + elif globalStridesLen <= 5: + for idx in range(globalStridesLen): + globalStridesStatic[idx] = ( globalStrides[idx])._pvt_ptr[0] + cyglobalStrides = globalStridesStatic + else: + raise ValueError("Argument 'globalStrides' too long, must be <= 5") + boxDimLen = 0 if boxDim is None else len(boxDim) + if boxDimLen == 0: + cyboxDim = NULL + elif boxDimLen == 1: + cyboxDim = ( boxDim[0])._pvt_ptr + elif boxDimLen <= 5: + for idx in range(boxDimLen): + boxDimStatic[idx] = ( boxDim[idx])._pvt_ptr[0] + cyboxDim = boxDimStatic + else: + raise ValueError("Argument 'boxDim' too long, must be <= 5") + elementStridesLen = 0 if elementStrides is None else len(elementStrides) + if elementStridesLen == 0: + cyelementStrides = NULL + elif elementStridesLen == 1: + cyelementStrides = ( elementStrides[0])._pvt_ptr + elif elementStridesLen <= 5: + for idx in range(elementStridesLen): + elementStridesStatic[idx] = ( elementStrides[idx])._pvt_ptr[0] + cyelementStrides = elementStridesStatic + else: + raise ValueError("Argument 'elementStrides' too long, must be <= 5") + cyinterleave = int(interleave) + cyswizzle = int(swizzle) + cyl2Promotion = int(l2Promotion) + cyoobFill = int(oobFill) + with nogil: + err = cydriver.cuTensorMapEncodeTiled(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress, cyglobalDim, cyglobalStrides, cyboxDim, cyelementStrides, cyinterleave, cyswizzle, cyl2Promotion, cyoobFill) + finally: + _helper_input_void_ptr_free(&cyglobalAddressHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, tensorMap) @@ -51817,91 +52291,102 @@ def cuTensorMapEncodeIm2col(tensorDataType not None : CUtensorMapDataType, tenso -------- :py:obj:`~.cuTensorMapEncodeTiled`, :py:obj:`~.cuTensorMapEncodeIm2colWide`, :py:obj:`~.cuTensorMapReplaceAddress` """ + cdef cydriver.CUtensorMapFloatOOBfill cyoobFill + cdef cydriver.CUtensorMapL2promotion cyl2Promotion + cdef cydriver.CUtensorMapSwizzle cyswizzle + cdef cydriver.CUtensorMapInterleave cyinterleave cdef cydriver.cuuint32_t* cyelementStrides cdef size_t elementStridesLen cdef cydriver.cuuint32_t[5] elementStridesStatic - elementStridesLen = 0 if elementStrides is None else len(elementStrides) - if elementStridesLen == 0: - cyelementStrides = NULL - elif elementStridesLen == 1: - cyelementStrides = ( elementStrides[0])._pvt_ptr - elif elementStridesLen <= 5: - for idx in range(elementStridesLen): - elementStridesStatic[idx] = ( elementStrides[idx])._pvt_ptr[0] - cyelementStrides = elementStridesStatic - else: - raise ValueError("Argument 'elementStrides' too long, must be <= 5") cdef cydriver.cuuint32_t cypixelsPerColumn - if pixelsPerColumn is None: - ppixelsPerColumn = 0 - elif isinstance(pixelsPerColumn, (cuuint32_t,)): - ppixelsPerColumn = int(pixelsPerColumn) - else: - ppixelsPerColumn = int(cuuint32_t(pixelsPerColumn)) - cypixelsPerColumn = ppixelsPerColumn cdef cydriver.cuuint32_t cychannelsPerPixel - if channelsPerPixel is None: - pchannelsPerPixel = 0 - elif isinstance(channelsPerPixel, (cuuint32_t,)): - pchannelsPerPixel = int(channelsPerPixel) - else: - pchannelsPerPixel = int(cuuint32_t(channelsPerPixel)) - cychannelsPerPixel = pchannelsPerPixel - pixelBoxUpperCorner = [] if pixelBoxUpperCorner is None else pixelBoxUpperCorner - if not all(isinstance(_x, (int)) for _x in pixelBoxUpperCorner): - raise TypeError("Argument 'pixelBoxUpperCorner' is not instance of type (expected tuple[int] or list[int]") - pixelBoxLowerCorner = [] if pixelBoxLowerCorner is None else pixelBoxLowerCorner - if not all(isinstance(_x, (int)) for _x in pixelBoxLowerCorner): - raise TypeError("Argument 'pixelBoxLowerCorner' is not instance of type (expected tuple[int] or list[int]") + cdef vector[int] cypixelBoxUpperCorner + cdef vector[int] cypixelBoxLowerCorner cdef cydriver.cuuint64_t* cyglobalStrides cdef size_t globalStridesLen cdef cydriver.cuuint64_t[5] globalStridesStatic - globalStridesLen = 0 if globalStrides is None else len(globalStrides) - if globalStridesLen == 0: - cyglobalStrides = NULL - elif globalStridesLen == 1: - cyglobalStrides = ( globalStrides[0])._pvt_ptr - elif globalStridesLen <= 5: - for idx in range(globalStridesLen): - globalStridesStatic[idx] = ( globalStrides[idx])._pvt_ptr[0] - cyglobalStrides = globalStridesStatic - else: - raise ValueError("Argument 'globalStrides' too long, must be <= 5") cdef cydriver.cuuint64_t* cyglobalDim cdef size_t globalDimLen cdef cydriver.cuuint64_t[5] globalDimStatic - globalDimLen = 0 if globalDim is None else len(globalDim) - if globalDimLen == 0: - cyglobalDim = NULL - elif globalDimLen == 1: - cyglobalDim = ( globalDim[0])._pvt_ptr - elif globalDimLen <= 5: - for idx in range(globalDimLen): - globalDimStatic[idx] = ( globalDim[idx])._pvt_ptr[0] - cyglobalDim = globalDimStatic - else: - raise ValueError("Argument 'globalDim' too long, must be <= 5") - cdef cydriver.cuuint32_t cytensorRank - if tensorRank is None: - ptensorRank = 0 - elif isinstance(tensorRank, (cuuint32_t,)): - ptensorRank = int(tensorRank) - else: - ptensorRank = int(cuuint32_t(tensorRank)) - cytensorRank = ptensorRank - cdef CUtensorMap tensorMap = CUtensorMap() - cdef cydriver.CUtensorMapDataType cytensorDataType = int(tensorDataType) cdef _HelperInputVoidPtrStruct cyglobalAddressHelper - cdef void* cyglobalAddress = _helper_input_void_ptr(globalAddress, &cyglobalAddressHelper) - cdef vector[int] cypixelBoxLowerCorner = pixelBoxLowerCorner - cdef vector[int] cypixelBoxUpperCorner = pixelBoxUpperCorner - cdef cydriver.CUtensorMapInterleave cyinterleave = int(interleave) - cdef cydriver.CUtensorMapSwizzle cyswizzle = int(swizzle) - cdef cydriver.CUtensorMapL2promotion cyl2Promotion = int(l2Promotion) - cdef cydriver.CUtensorMapFloatOOBfill cyoobFill = int(oobFill) - with nogil: - err = cydriver.cuTensorMapEncodeIm2col(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress, cyglobalDim, cyglobalStrides, cypixelBoxLowerCorner.data(), cypixelBoxUpperCorner.data(), cychannelsPerPixel, cypixelsPerColumn, cyelementStrides, cyinterleave, cyswizzle, cyl2Promotion, cyoobFill) - _helper_input_void_ptr_free(&cyglobalAddressHelper) + cdef void* cyglobalAddress + cdef cydriver.cuuint32_t cytensorRank + cdef cydriver.CUtensorMapDataType cytensorDataType + cdef CUtensorMap tensorMap + try: + tensorMap = CUtensorMap() + cytensorDataType = int(tensorDataType) + if tensorRank is None: + ptensorRank = 0 + elif isinstance(tensorRank, (cuuint32_t,)): + ptensorRank = int(tensorRank) + else: + ptensorRank = int(cuuint32_t(tensorRank)) + cytensorRank = ptensorRank + cyglobalAddress = _helper_input_void_ptr(globalAddress, &cyglobalAddressHelper) + globalDimLen = 0 if globalDim is None else len(globalDim) + if globalDimLen == 0: + cyglobalDim = NULL + elif globalDimLen == 1: + cyglobalDim = ( globalDim[0])._pvt_ptr + elif globalDimLen <= 5: + for idx in range(globalDimLen): + globalDimStatic[idx] = ( globalDim[idx])._pvt_ptr[0] + cyglobalDim = globalDimStatic + else: + raise ValueError("Argument 'globalDim' too long, must be <= 5") + globalStridesLen = 0 if globalStrides is None else len(globalStrides) + if globalStridesLen == 0: + cyglobalStrides = NULL + elif globalStridesLen == 1: + cyglobalStrides = ( globalStrides[0])._pvt_ptr + elif globalStridesLen <= 5: + for idx in range(globalStridesLen): + globalStridesStatic[idx] = ( globalStrides[idx])._pvt_ptr[0] + cyglobalStrides = globalStridesStatic + else: + raise ValueError("Argument 'globalStrides' too long, must be <= 5") + pixelBoxLowerCorner = [] if pixelBoxLowerCorner is None else pixelBoxLowerCorner + if not all(isinstance(_x, (int)) for _x in pixelBoxLowerCorner): + raise TypeError("Argument 'pixelBoxLowerCorner' is not instance of type (expected tuple[int] or list[int]") + cypixelBoxLowerCorner = pixelBoxLowerCorner + pixelBoxUpperCorner = [] if pixelBoxUpperCorner is None else pixelBoxUpperCorner + if not all(isinstance(_x, (int)) for _x in pixelBoxUpperCorner): + raise TypeError("Argument 'pixelBoxUpperCorner' is not instance of type (expected tuple[int] or list[int]") + cypixelBoxUpperCorner = pixelBoxUpperCorner + if channelsPerPixel is None: + pchannelsPerPixel = 0 + elif isinstance(channelsPerPixel, (cuuint32_t,)): + pchannelsPerPixel = int(channelsPerPixel) + else: + pchannelsPerPixel = int(cuuint32_t(channelsPerPixel)) + cychannelsPerPixel = pchannelsPerPixel + if pixelsPerColumn is None: + ppixelsPerColumn = 0 + elif isinstance(pixelsPerColumn, (cuuint32_t,)): + ppixelsPerColumn = int(pixelsPerColumn) + else: + ppixelsPerColumn = int(cuuint32_t(pixelsPerColumn)) + cypixelsPerColumn = ppixelsPerColumn + elementStridesLen = 0 if elementStrides is None else len(elementStrides) + if elementStridesLen == 0: + cyelementStrides = NULL + elif elementStridesLen == 1: + cyelementStrides = ( elementStrides[0])._pvt_ptr + elif elementStridesLen <= 5: + for idx in range(elementStridesLen): + elementStridesStatic[idx] = ( elementStrides[idx])._pvt_ptr[0] + cyelementStrides = elementStridesStatic + else: + raise ValueError("Argument 'elementStrides' too long, must be <= 5") + cyinterleave = int(interleave) + cyswizzle = int(swizzle) + cyl2Promotion = int(l2Promotion) + cyoobFill = int(oobFill) + with nogil: + err = cydriver.cuTensorMapEncodeIm2col(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress, cyglobalDim, cyglobalStrides, cypixelBoxLowerCorner.data(), cypixelBoxUpperCorner.data(), cychannelsPerPixel, cypixelsPerColumn, cyelementStrides, cyinterleave, cyswizzle, cyl2Promotion, cyoobFill) + finally: + _helper_input_void_ptr_free(&cyglobalAddressHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, tensorMap) @@ -52149,84 +52634,94 @@ def cuTensorMapEncodeIm2colWide(tensorDataType not None : CUtensorMapDataType, t -------- :py:obj:`~.cuTensorMapEncodeTiled`, :py:obj:`~.cuTensorMapEncodeIm2col`, :py:obj:`~.cuTensorMapReplaceAddress` """ + cdef cydriver.CUtensorMapFloatOOBfill cyoobFill + cdef cydriver.CUtensorMapL2promotion cyl2Promotion + cdef cydriver.CUtensorMapSwizzle cyswizzle + cdef cydriver.CUtensorMapIm2ColWideMode cymode + cdef cydriver.CUtensorMapInterleave cyinterleave cdef cydriver.cuuint32_t* cyelementStrides cdef size_t elementStridesLen cdef cydriver.cuuint32_t[5] elementStridesStatic - elementStridesLen = 0 if elementStrides is None else len(elementStrides) - if elementStridesLen == 0: - cyelementStrides = NULL - elif elementStridesLen == 1: - cyelementStrides = ( elementStrides[0])._pvt_ptr - elif elementStridesLen <= 5: - for idx in range(elementStridesLen): - elementStridesStatic[idx] = ( elementStrides[idx])._pvt_ptr[0] - cyelementStrides = elementStridesStatic - else: - raise ValueError("Argument 'elementStrides' too long, must be <= 5") cdef cydriver.cuuint32_t cypixelsPerColumn - if pixelsPerColumn is None: - ppixelsPerColumn = 0 - elif isinstance(pixelsPerColumn, (cuuint32_t,)): - ppixelsPerColumn = int(pixelsPerColumn) - else: - ppixelsPerColumn = int(cuuint32_t(pixelsPerColumn)) - cypixelsPerColumn = ppixelsPerColumn cdef cydriver.cuuint32_t cychannelsPerPixel - if channelsPerPixel is None: - pchannelsPerPixel = 0 - elif isinstance(channelsPerPixel, (cuuint32_t,)): - pchannelsPerPixel = int(channelsPerPixel) - else: - pchannelsPerPixel = int(cuuint32_t(channelsPerPixel)) - cychannelsPerPixel = pchannelsPerPixel cdef cydriver.cuuint64_t* cyglobalStrides cdef size_t globalStridesLen cdef cydriver.cuuint64_t[5] globalStridesStatic - globalStridesLen = 0 if globalStrides is None else len(globalStrides) - if globalStridesLen == 0: - cyglobalStrides = NULL - elif globalStridesLen == 1: - cyglobalStrides = ( globalStrides[0])._pvt_ptr - elif globalStridesLen <= 5: - for idx in range(globalStridesLen): - globalStridesStatic[idx] = ( globalStrides[idx])._pvt_ptr[0] - cyglobalStrides = globalStridesStatic - else: - raise ValueError("Argument 'globalStrides' too long, must be <= 5") cdef cydriver.cuuint64_t* cyglobalDim cdef size_t globalDimLen cdef cydriver.cuuint64_t[5] globalDimStatic - globalDimLen = 0 if globalDim is None else len(globalDim) - if globalDimLen == 0: - cyglobalDim = NULL - elif globalDimLen == 1: - cyglobalDim = ( globalDim[0])._pvt_ptr - elif globalDimLen <= 5: - for idx in range(globalDimLen): - globalDimStatic[idx] = ( globalDim[idx])._pvt_ptr[0] - cyglobalDim = globalDimStatic - else: - raise ValueError("Argument 'globalDim' too long, must be <= 5") - cdef cydriver.cuuint32_t cytensorRank - if tensorRank is None: - ptensorRank = 0 - elif isinstance(tensorRank, (cuuint32_t,)): - ptensorRank = int(tensorRank) - else: - ptensorRank = int(cuuint32_t(tensorRank)) - cytensorRank = ptensorRank - cdef CUtensorMap tensorMap = CUtensorMap() - cdef cydriver.CUtensorMapDataType cytensorDataType = int(tensorDataType) cdef _HelperInputVoidPtrStruct cyglobalAddressHelper - cdef void* cyglobalAddress = _helper_input_void_ptr(globalAddress, &cyglobalAddressHelper) - cdef cydriver.CUtensorMapInterleave cyinterleave = int(interleave) - cdef cydriver.CUtensorMapIm2ColWideMode cymode = int(mode) - cdef cydriver.CUtensorMapSwizzle cyswizzle = int(swizzle) - cdef cydriver.CUtensorMapL2promotion cyl2Promotion = int(l2Promotion) - cdef cydriver.CUtensorMapFloatOOBfill cyoobFill = int(oobFill) - with nogil: - err = cydriver.cuTensorMapEncodeIm2colWide(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress, cyglobalDim, cyglobalStrides, pixelBoxLowerCornerWidth, pixelBoxUpperCornerWidth, cychannelsPerPixel, cypixelsPerColumn, cyelementStrides, cyinterleave, cymode, cyswizzle, cyl2Promotion, cyoobFill) - _helper_input_void_ptr_free(&cyglobalAddressHelper) + cdef void* cyglobalAddress + cdef cydriver.cuuint32_t cytensorRank + cdef cydriver.CUtensorMapDataType cytensorDataType + cdef CUtensorMap tensorMap + try: + tensorMap = CUtensorMap() + cytensorDataType = int(tensorDataType) + if tensorRank is None: + ptensorRank = 0 + elif isinstance(tensorRank, (cuuint32_t,)): + ptensorRank = int(tensorRank) + else: + ptensorRank = int(cuuint32_t(tensorRank)) + cytensorRank = ptensorRank + cyglobalAddress = _helper_input_void_ptr(globalAddress, &cyglobalAddressHelper) + globalDimLen = 0 if globalDim is None else len(globalDim) + if globalDimLen == 0: + cyglobalDim = NULL + elif globalDimLen == 1: + cyglobalDim = ( globalDim[0])._pvt_ptr + elif globalDimLen <= 5: + for idx in range(globalDimLen): + globalDimStatic[idx] = ( globalDim[idx])._pvt_ptr[0] + cyglobalDim = globalDimStatic + else: + raise ValueError("Argument 'globalDim' too long, must be <= 5") + globalStridesLen = 0 if globalStrides is None else len(globalStrides) + if globalStridesLen == 0: + cyglobalStrides = NULL + elif globalStridesLen == 1: + cyglobalStrides = ( globalStrides[0])._pvt_ptr + elif globalStridesLen <= 5: + for idx in range(globalStridesLen): + globalStridesStatic[idx] = ( globalStrides[idx])._pvt_ptr[0] + cyglobalStrides = globalStridesStatic + else: + raise ValueError("Argument 'globalStrides' too long, must be <= 5") + if channelsPerPixel is None: + pchannelsPerPixel = 0 + elif isinstance(channelsPerPixel, (cuuint32_t,)): + pchannelsPerPixel = int(channelsPerPixel) + else: + pchannelsPerPixel = int(cuuint32_t(channelsPerPixel)) + cychannelsPerPixel = pchannelsPerPixel + if pixelsPerColumn is None: + ppixelsPerColumn = 0 + elif isinstance(pixelsPerColumn, (cuuint32_t,)): + ppixelsPerColumn = int(pixelsPerColumn) + else: + ppixelsPerColumn = int(cuuint32_t(pixelsPerColumn)) + cypixelsPerColumn = ppixelsPerColumn + elementStridesLen = 0 if elementStrides is None else len(elementStrides) + if elementStridesLen == 0: + cyelementStrides = NULL + elif elementStridesLen == 1: + cyelementStrides = ( elementStrides[0])._pvt_ptr + elif elementStridesLen <= 5: + for idx in range(elementStridesLen): + elementStridesStatic[idx] = ( elementStrides[idx])._pvt_ptr[0] + cyelementStrides = elementStridesStatic + else: + raise ValueError("Argument 'elementStrides' too long, must be <= 5") + cyinterleave = int(interleave) + cymode = int(mode) + cyswizzle = int(swizzle) + cyl2Promotion = int(l2Promotion) + cyoobFill = int(oobFill) + with nogil: + err = cydriver.cuTensorMapEncodeIm2colWide(tensorMap._pvt_ptr, cytensorDataType, cytensorRank, cyglobalAddress, cyglobalDim, cyglobalStrides, pixelBoxLowerCornerWidth, pixelBoxUpperCornerWidth, cychannelsPerPixel, cypixelsPerColumn, cyelementStrides, cyinterleave, cymode, cyswizzle, cyl2Promotion, cyoobFill) + finally: + _helper_input_void_ptr_free(&cyglobalAddressHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, tensorMap) @@ -52262,12 +52757,16 @@ def cuTensorMapReplaceAddress(tensorMap : Optional[CUtensorMap], globalAddress): -------- :py:obj:`~.cuTensorMapEncodeTiled`, :py:obj:`~.cuTensorMapEncodeIm2col`, :py:obj:`~.cuTensorMapEncodeIm2colWide` """ - cdef cydriver.CUtensorMap* cytensorMap_ptr = tensorMap._pvt_ptr if tensorMap is not None else NULL cdef _HelperInputVoidPtrStruct cyglobalAddressHelper - cdef void* cyglobalAddress = _helper_input_void_ptr(globalAddress, &cyglobalAddressHelper) - with nogil: - err = cydriver.cuTensorMapReplaceAddress(cytensorMap_ptr, cyglobalAddress) - _helper_input_void_ptr_free(&cyglobalAddressHelper) + cdef void* cyglobalAddress + cdef cydriver.CUtensorMap* cytensorMap_ptr + try: + cytensorMap_ptr = tensorMap._pvt_ptr if tensorMap is not None else NULL + cyglobalAddress = _helper_input_void_ptr(globalAddress, &cyglobalAddressHelper) + with nogil: + err = cydriver.cuTensorMapReplaceAddress(cytensorMap_ptr, cyglobalAddress) + finally: + _helper_input_void_ptr_free(&cyglobalAddressHelper) return (_CUresult(err),) {{endif}} @@ -52304,14 +52803,8 @@ def cuDeviceCanAccessPeer(dev, peerDev): :py:obj:`~.cuCtxEnablePeerAccess`, :py:obj:`~.cuCtxDisablePeerAccess`, :py:obj:`~.cudaDeviceCanAccessPeer` """ cdef cydriver.CUdevice cypeerDev - if peerDev is None: - ppeerDev = 0 - elif isinstance(peerDev, (CUdevice,)): - ppeerDev = int(peerDev) - else: - ppeerDev = int(CUdevice(peerDev)) - cypeerDev = ppeerDev cdef cydriver.CUdevice cydev + cdef int canAccessPeer = 0 if dev is None: pdev = 0 elif isinstance(dev, (CUdevice,)): @@ -52319,7 +52812,13 @@ def cuDeviceCanAccessPeer(dev, peerDev): else: pdev = int(CUdevice(dev)) cydev = pdev - cdef int canAccessPeer = 0 + if peerDev is None: + ppeerDev = 0 + elif isinstance(peerDev, (CUdevice,)): + ppeerDev = int(peerDev) + else: + ppeerDev = int(CUdevice(peerDev)) + cypeerDev = ppeerDev with nogil: err = cydriver.cuDeviceCanAccessPeer(&canAccessPeer, cydev, cypeerDev) if err != cydriver.CUDA_SUCCESS: @@ -52492,14 +52991,10 @@ def cuDeviceGetP2PAttribute(attrib not None : CUdevice_P2PAttribute, srcDevice, :py:obj:`~.cuCtxEnablePeerAccess`, :py:obj:`~.cuCtxDisablePeerAccess`, :py:obj:`~.cuDeviceCanAccessPeer`, :py:obj:`~.cuDeviceGetP2PAtomicCapabilities`, :py:obj:`~.cudaDeviceGetP2PAttribute` """ cdef cydriver.CUdevice cydstDevice - if dstDevice is None: - pdstDevice = 0 - elif isinstance(dstDevice, (CUdevice,)): - pdstDevice = int(dstDevice) - else: - pdstDevice = int(CUdevice(dstDevice)) - cydstDevice = pdstDevice cdef cydriver.CUdevice cysrcDevice + cdef cydriver.CUdevice_P2PAttribute cyattrib + cdef int value = 0 + cyattrib = int(attrib) if srcDevice is None: psrcDevice = 0 elif isinstance(srcDevice, (CUdevice,)): @@ -52507,8 +53002,13 @@ def cuDeviceGetP2PAttribute(attrib not None : CUdevice_P2PAttribute, srcDevice, else: psrcDevice = int(CUdevice(srcDevice)) cysrcDevice = psrcDevice - cdef int value = 0 - cdef cydriver.CUdevice_P2PAttribute cyattrib = int(attrib) + if dstDevice is None: + pdstDevice = 0 + elif isinstance(dstDevice, (CUdevice,)): + pdstDevice = int(dstDevice) + else: + pdstDevice = int(CUdevice(dstDevice)) + cydstDevice = pdstDevice with nogil: err = cydriver.cuDeviceGetP2PAttribute(&value, cyattrib, cysrcDevice, cydstDevice) if err != cydriver.CUDA_SUCCESS: @@ -52562,38 +53062,41 @@ def cuDeviceGetP2PAtomicCapabilities(operations : Optional[tuple[CUatomicOperati :py:obj:`~.cuDeviceGetP2PAttribute`, :py:obj:`~.cudaDeviceGetP2PAttribute`, :py:obj:`~.cudaDeviceGetP2PAtomicCapabilities` """ cdef cydriver.CUdevice cydstDevice - if dstDevice is None: - pdstDevice = 0 - elif isinstance(dstDevice, (CUdevice,)): - pdstDevice = int(dstDevice) - else: - pdstDevice = int(CUdevice(dstDevice)) - cydstDevice = pdstDevice cdef cydriver.CUdevice cysrcDevice - if srcDevice is None: - psrcDevice = 0 - elif isinstance(srcDevice, (CUdevice,)): - psrcDevice = int(srcDevice) - else: - psrcDevice = int(CUdevice(srcDevice)) - cysrcDevice = psrcDevice - operations = [] if operations is None else operations - if not all(isinstance(_x, (CUatomicOperation)) for _x in operations): - raise TypeError("Argument 'operations' is not instance of type (expected tuple[cydriver.CUatomicOperation] or list[cydriver.CUatomicOperation]") + cdef vector[cydriver.CUatomicOperation] cyoperations cdef unsigned int* cycapabilities = NULL pycapabilities = [] - if count != 0: - cycapabilities = calloc(count, sizeof(unsigned int)) - if cycapabilities is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) - cdef vector[cydriver.CUatomicOperation] cyoperations = [int(pyoperations) for pyoperations in (operations)] - if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count)) - with nogil: - err = cydriver.cuDeviceGetP2PAtomicCapabilities(cycapabilities, cyoperations.data(), count, cysrcDevice, cydstDevice) - if CUresult(err) == CUresult(0): - pycapabilities = [cycapabilities[idx] for idx in range(count)] - if cycapabilities is not NULL: - free(cycapabilities) + try: + if count != 0: + cycapabilities = calloc(count, sizeof(unsigned int)) + if cycapabilities is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) + operations = [] if operations is None else operations + if not all(isinstance(_x, (CUatomicOperation)) for _x in operations): + raise TypeError("Argument 'operations' is not instance of type (expected tuple[cydriver.CUatomicOperation] or list[cydriver.CUatomicOperation]") + cyoperations = operations + if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count)) + if srcDevice is None: + psrcDevice = 0 + elif isinstance(srcDevice, (CUdevice,)): + psrcDevice = int(srcDevice) + else: + psrcDevice = int(CUdevice(srcDevice)) + cysrcDevice = psrcDevice + if dstDevice is None: + pdstDevice = 0 + elif isinstance(dstDevice, (CUdevice,)): + pdstDevice = int(dstDevice) + else: + pdstDevice = int(CUdevice(dstDevice)) + cydstDevice = pdstDevice + with nogil: + err = cydriver.cuDeviceGetP2PAtomicCapabilities(cycapabilities, cyoperations.data(), count, cysrcDevice, cydstDevice) + finally: + if CUresult(err) == CUresult(0): + pycapabilities = [cycapabilities[idx] for idx in range(count)] + if cycapabilities is not NULL: + free(cycapabilities) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, pycapabilities) @@ -52681,6 +53184,8 @@ def cuGraphicsSubResourceGetMappedArray(resource, unsigned int arrayIndex, unsig :py:obj:`~.cuGraphicsResourceGetMappedPointer`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray` """ cdef cydriver.CUgraphicsResource cyresource + cdef CUarray pArray + pArray = CUarray() if resource is None: presource = 0 elif isinstance(resource, (CUgraphicsResource,)): @@ -52688,7 +53193,6 @@ def cuGraphicsSubResourceGetMappedArray(resource, unsigned int arrayIndex, unsig else: presource = int(CUgraphicsResource(resource)) cyresource = presource - cdef CUarray pArray = CUarray() with nogil: err = cydriver.cuGraphicsSubResourceGetMappedArray(pArray._pvt_ptr, cyresource, arrayIndex, mipLevel) if err != cydriver.CUDA_SUCCESS: @@ -52728,6 +53232,8 @@ def cuGraphicsResourceGetMappedMipmappedArray(resource): :py:obj:`~.cuGraphicsResourceGetMappedPointer`, :py:obj:`~.cudaGraphicsResourceGetMappedMipmappedArray` """ cdef cydriver.CUgraphicsResource cyresource + cdef CUmipmappedArray pMipmappedArray + pMipmappedArray = CUmipmappedArray() if resource is None: presource = 0 elif isinstance(resource, (CUgraphicsResource,)): @@ -52735,7 +53241,6 @@ def cuGraphicsResourceGetMappedMipmappedArray(resource): else: presource = int(CUgraphicsResource(resource)) cyresource = presource - cdef CUmipmappedArray pMipmappedArray = CUmipmappedArray() with nogil: err = cydriver.cuGraphicsResourceGetMappedMipmappedArray(pMipmappedArray._pvt_ptr, cyresource) if err != cydriver.CUDA_SUCCESS: @@ -52774,6 +53279,9 @@ def cuGraphicsResourceGetMappedPointer(resource): None """ cdef cydriver.CUgraphicsResource cyresource + cdef size_t pSize = 0 + cdef CUdeviceptr pDevPtr + pDevPtr = CUdeviceptr() if resource is None: presource = 0 elif isinstance(resource, (CUgraphicsResource,)): @@ -52781,8 +53289,6 @@ def cuGraphicsResourceGetMappedPointer(resource): else: presource = int(CUgraphicsResource(resource)) cyresource = presource - cdef CUdeviceptr pDevPtr = CUdeviceptr() - cdef size_t pSize = 0 with nogil: err = cydriver.cuGraphicsResourceGetMappedPointer(pDevPtr._pvt_ptr, &pSize, cyresource) if err != cydriver.CUDA_SUCCESS: @@ -52890,13 +53396,6 @@ def cuGraphicsMapResources(unsigned int count, resources, hStream): :py:obj:`~.cuGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cudaGraphicsMapResources` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUgraphicsResource *cyresources if resources is None: cyresources = NULL @@ -52907,6 +53406,13 @@ def cuGraphicsMapResources(unsigned int count, resources, hStream): cyresources = resources else: raise TypeError("Argument 'resources' is not instance of type (expected , found " + str(type(resources))) + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuGraphicsMapResources(count, cyresources, cyhStream) return (_CUresult(err),) @@ -52951,13 +53457,6 @@ def cuGraphicsUnmapResources(unsigned int count, resources, hStream): :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cudaGraphicsUnmapResources` """ cdef cydriver.CUstream cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (CUstream,)): - phStream = int(hStream) - else: - phStream = int(CUstream(hStream)) - cyhStream = phStream cdef cydriver.CUgraphicsResource *cyresources if resources is None: cyresources = NULL @@ -52968,6 +53467,13 @@ def cuGraphicsUnmapResources(unsigned int count, resources, hStream): cyresources = resources else: raise TypeError("Argument 'resources' is not instance of type (expected , found " + str(type(resources))) + if hStream is None: + phStream = 0 + elif isinstance(hStream, (CUstream,)): + phStream = int(hStream) + else: + phStream = int(CUstream(hStream)) + cyhStream = phStream with nogil: err = cydriver.cuGraphicsUnmapResources(count, cyresources, cyhStream) return (_CUresult(err),) @@ -53062,7 +53568,9 @@ def cuGetProcAddress(char* symbol, int cudaVersion, flags): -------- :py:obj:`~.cudaGetDriverEntryPointByVersion` """ + cdef cydriver.CUdriverProcAddressQueryResult symbolStatus cdef cydriver.cuuint64_t cyflags + cdef void_ptr pfn = 0 if flags is None: pflags = 0 elif isinstance(flags, (cuuint64_t,)): @@ -53070,8 +53578,6 @@ def cuGetProcAddress(char* symbol, int cudaVersion, flags): else: pflags = int(cuuint64_t(flags)) cyflags = pflags - cdef void_ptr pfn = 0 - cdef cydriver.CUdriverProcAddressQueryResult symbolStatus with nogil: err = cydriver.cuGetProcAddress(symbol, &pfn, cudaVersion, cyflags, &symbolStatus) if err != cydriver.CUDA_SUCCESS: @@ -53188,10 +53694,14 @@ def cuCoredumpGetAttribute(attrib not None : CUcoredumpSettings): -------- :py:obj:`~.cuCoredumpGetAttributeGlobal`, :py:obj:`~.cuCoredumpSetAttribute`, :py:obj:`~.cuCoredumpSetAttributeGlobal` """ - cdef cydriver.CUcoredumpSettings cyattrib = int(attrib) - cdef _HelperCUcoredumpSettings cyvalue = _HelperCUcoredumpSettings(attrib, 0, is_getter=True) - cdef void* cyvalue_ptr = cyvalue.cptr - cdef size_t size = cyvalue.size() + cdef size_t size + cdef _HelperCUcoredumpSettings cyvalue + cdef void* cyvalue_ptr + cdef cydriver.CUcoredumpSettings cyattrib + cyattrib = int(attrib) + cyvalue = _HelperCUcoredumpSettings(attrib, 0, is_getter=True) + cyvalue_ptr = cyvalue.cptr + size = cyvalue.size() with nogil: err = cydriver.cuCoredumpGetAttribute(cyattrib, cyvalue_ptr, &size) if err != cydriver.CUDA_SUCCESS: @@ -53304,10 +53814,14 @@ def cuCoredumpGetAttributeGlobal(attrib not None : CUcoredumpSettings): -------- :py:obj:`~.cuCoredumpGetAttribute`, :py:obj:`~.cuCoredumpSetAttribute`, :py:obj:`~.cuCoredumpSetAttributeGlobal` """ - cdef cydriver.CUcoredumpSettings cyattrib = int(attrib) - cdef _HelperCUcoredumpSettings cyvalue = _HelperCUcoredumpSettings(attrib, 0, is_getter=True) - cdef void* cyvalue_ptr = cyvalue.cptr - cdef size_t size = cyvalue.size() + cdef size_t size + cdef _HelperCUcoredumpSettings cyvalue + cdef void* cyvalue_ptr + cdef cydriver.CUcoredumpSettings cyattrib + cyattrib = int(attrib) + cyvalue = _HelperCUcoredumpSettings(attrib, 0, is_getter=True) + cyvalue_ptr = cyvalue.cptr + size = cyvalue.size() with nogil: err = cydriver.cuCoredumpGetAttributeGlobal(cyattrib, cyvalue_ptr, &size) if err != cydriver.CUDA_SUCCESS: @@ -53427,10 +53941,14 @@ def cuCoredumpSetAttribute(attrib not None : CUcoredumpSettings, value): -------- :py:obj:`~.cuCoredumpGetAttributeGlobal`, :py:obj:`~.cuCoredumpGetAttribute`, :py:obj:`~.cuCoredumpSetAttributeGlobal` """ - cdef cydriver.CUcoredumpSettings cyattrib = int(attrib) - cdef _HelperCUcoredumpSettings cyvalue = _HelperCUcoredumpSettings(attrib, value, is_getter=False) - cdef void* cyvalue_ptr = cyvalue.cptr - cdef size_t size = cyvalue.size() + cdef size_t size + cdef _HelperCUcoredumpSettings cyvalue + cdef void* cyvalue_ptr + cdef cydriver.CUcoredumpSettings cyattrib + cyattrib = int(attrib) + cyvalue = _HelperCUcoredumpSettings(attrib, value, is_getter=False) + cyvalue_ptr = cyvalue.cptr + size = cyvalue.size() with nogil: err = cydriver.cuCoredumpSetAttribute(cyattrib, cyvalue_ptr, &size) return (_CUresult(err),) @@ -53553,10 +54071,14 @@ def cuCoredumpSetAttributeGlobal(attrib not None : CUcoredumpSettings, value): -------- :py:obj:`~.cuCoredumpGetAttribute`, :py:obj:`~.cuCoredumpGetAttributeGlobal`, :py:obj:`~.cuCoredumpSetAttribute` """ - cdef cydriver.CUcoredumpSettings cyattrib = int(attrib) - cdef _HelperCUcoredumpSettings cyvalue = _HelperCUcoredumpSettings(attrib, value, is_getter=False) - cdef void* cyvalue_ptr = cyvalue.cptr - cdef size_t size = cyvalue.size() + cdef size_t size + cdef _HelperCUcoredumpSettings cyvalue + cdef void* cyvalue_ptr + cdef cydriver.CUcoredumpSettings cyattrib + cyattrib = int(attrib) + cyvalue = _HelperCUcoredumpSettings(attrib, value, is_getter=False) + cyvalue_ptr = cyvalue.cptr + size = cyvalue.size() with nogil: err = cydriver.cuCoredumpSetAttributeGlobal(cyattrib, cyvalue_ptr, &size) return (_CUresult(err),) @@ -53580,8 +54102,9 @@ def cuGetExportTable(pExportTableId : Optional[CUuuid]): ppExportTable : Any None """ + cdef cydriver.CUuuid* cypExportTableId_ptr cdef void_ptr ppExportTable = 0 - cdef cydriver.CUuuid* cypExportTableId_ptr = pExportTableId._pvt_ptr if pExportTableId is not None else NULL + cypExportTableId_ptr = pExportTableId._pvt_ptr if pExportTableId is not None else NULL with nogil: err = cydriver.cuGetExportTable(&ppExportTable, cypExportTableId_ptr) if err != cydriver.CUDA_SUCCESS: @@ -53642,14 +54165,9 @@ def cuGreenCtxCreate(desc, dev, unsigned int flags): :py:obj:`~.cuGreenCtxDestroy`, :py:obj:`~.cuCtxFromGreenCtx`, :py:obj:`~.cuCtxSetCurrent`, :py:obj:`~.cuCtxPushCurrent`, :py:obj:`~.cuDevResourceGenerateDesc`, :py:obj:`~.cuDevicePrimaryCtxRetain`, :py:obj:`~.cuCtxCreate` """ cdef cydriver.CUdevice cydev - if dev is None: - pdev = 0 - elif isinstance(dev, (CUdevice,)): - pdev = int(dev) - else: - pdev = int(CUdevice(dev)) - cydev = pdev cdef cydriver.CUdevResourceDesc cydesc + cdef CUgreenCtx phCtx + phCtx = CUgreenCtx() if desc is None: pdesc = 0 elif isinstance(desc, (CUdevResourceDesc,)): @@ -53657,7 +54175,13 @@ def cuGreenCtxCreate(desc, dev, unsigned int flags): else: pdesc = int(CUdevResourceDesc(desc)) cydesc = pdesc - cdef CUgreenCtx phCtx = CUgreenCtx() + if dev is None: + pdev = 0 + elif isinstance(dev, (CUdevice,)): + pdev = int(dev) + else: + pdev = int(CUdevice(dev)) + cydev = pdev with nogil: err = cydriver.cuGreenCtxCreate(phCtx._pvt_ptr, cydesc, cydev, flags) if err != cydriver.CUDA_SUCCESS: @@ -53749,6 +54273,8 @@ def cuCtxFromGreenCtx(hCtx): :py:obj:`~.cuGreenCtxCreate` """ cdef cydriver.CUgreenCtx cyhCtx + cdef CUcontext pContext + pContext = CUcontext() if hCtx is None: phCtx = 0 elif isinstance(hCtx, (CUgreenCtx,)): @@ -53756,7 +54282,6 @@ def cuCtxFromGreenCtx(hCtx): else: phCtx = int(CUgreenCtx(hCtx)) cyhCtx = phCtx - cdef CUcontext pContext = CUcontext() with nogil: err = cydriver.cuCtxFromGreenCtx(pContext._pvt_ptr, cyhCtx) if err != cydriver.CUDA_SUCCESS: @@ -53794,6 +54319,8 @@ def cuDeviceGetDevResource(device, typename not None : CUdevResourceType): -------- :py:obj:`~.cuDevResourceGenerateDesc` """ + cdef cydriver.CUdevResourceType cytypename + cdef CUdevResource resource cdef cydriver.CUdevice cydevice if device is None: pdevice = 0 @@ -53802,8 +54329,8 @@ def cuDeviceGetDevResource(device, typename not None : CUdevResourceType): else: pdevice = int(CUdevice(device)) cydevice = pdevice - cdef CUdevResource resource = CUdevResource() - cdef cydriver.CUdevResourceType cytypename = int(typename) + resource = CUdevResource() + cytypename = int(typename) with nogil: err = cydriver.cuDeviceGetDevResource(cydevice, resource._pvt_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: @@ -53838,6 +54365,8 @@ def cuCtxGetDevResource(hCtx, typename not None : CUdevResourceType): -------- :py:obj:`~.cuDevResourceGenerateDesc` """ + cdef cydriver.CUdevResourceType cytypename + cdef CUdevResource resource cdef cydriver.CUcontext cyhCtx if hCtx is None: phCtx = 0 @@ -53846,8 +54375,8 @@ def cuCtxGetDevResource(hCtx, typename not None : CUdevResourceType): else: phCtx = int(CUcontext(hCtx)) cyhCtx = phCtx - cdef CUdevResource resource = CUdevResource() - cdef cydriver.CUdevResourceType cytypename = int(typename) + resource = CUdevResource() + cytypename = int(typename) with nogil: err = cydriver.cuCtxGetDevResource(cyhCtx, resource._pvt_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: @@ -53882,6 +54411,8 @@ def cuGreenCtxGetDevResource(hCtx, typename not None : CUdevResourceType): -------- :py:obj:`~.cuDevResourceGenerateDesc` """ + cdef cydriver.CUdevResourceType cytypename + cdef CUdevResource resource cdef cydriver.CUgreenCtx cyhCtx if hCtx is None: phCtx = 0 @@ -53890,8 +54421,8 @@ def cuGreenCtxGetDevResource(hCtx, typename not None : CUdevResourceType): else: phCtx = int(CUgreenCtx(hCtx)) cyhCtx = phCtx - cdef CUdevResource resource = CUdevResource() - cdef cydriver.CUdevResourceType cytypename = int(typename) + resource = CUdevResource() + cytypename = int(typename) with nogil: err = cydriver.cuGreenCtxGetDevResource(cyhCtx, resource._pvt_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: @@ -54002,22 +54533,27 @@ def cuDevSmResourceSplitByCount(unsigned int nbGroups, input_ : Optional[CUdevRe -------- :py:obj:`~.cuGreenCtxGetDevResource`, :py:obj:`~.cuCtxGetDevResource`, :py:obj:`~.cuDeviceGetDevResource` """ + cdef CUdevResource remainder + cdef cydriver.CUdevResource* cyinput__ptr + cdef unsigned int cynbGroups cdef cydriver.CUdevResource* cyresult = NULL pyresult = [CUdevResource() for idx in range(nbGroups)] - if nbGroups != 0: - cyresult = calloc(nbGroups, sizeof(cydriver.CUdevResource)) - if cyresult is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cydriver.CUdevResource))) - cdef unsigned int cynbGroups = nbGroups - cdef cydriver.CUdevResource* cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL - cdef CUdevResource remainder = CUdevResource() - with nogil: - err = cydriver.cuDevSmResourceSplitByCount(cyresult, &cynbGroups, cyinput__ptr, remainder._pvt_ptr, flags, minCount) - if CUresult(err) == CUresult(0): - for idx in range(nbGroups): - string.memcpy((pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cydriver.CUdevResource)) - if cyresult is not NULL: - free(cyresult) + try: + if nbGroups != 0: + cyresult = calloc(nbGroups, sizeof(cydriver.CUdevResource)) + if cyresult is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cydriver.CUdevResource))) + cynbGroups = nbGroups + cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL + remainder = CUdevResource() + with nogil: + err = cydriver.cuDevSmResourceSplitByCount(cyresult, &cynbGroups, cyinput__ptr, remainder._pvt_ptr, flags, minCount) + finally: + if CUresult(err) == CUresult(0): + for idx in range(nbGroups): + string.memcpy((pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cydriver.CUdevResource)) + if cyresult is not NULL: + free(cyresult) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None, None, None) return (_CUresult_SUCCESS, pyresult, cynbGroups, remainder) @@ -54163,22 +54699,27 @@ def cuDevSmResourceSplit(unsigned int nbGroups, input_ : Optional[CUdevResource] -------- :py:obj:`~.cuGreenCtxGetDevResource`, :py:obj:`~.cuCtxGetDevResource`, :py:obj:`~.cuDeviceGetDevResource` """ + cdef cydriver.CU_DEV_SM_RESOURCE_GROUP_PARAMS* cygroupParams_ptr + cdef CUdevResource remainder + cdef cydriver.CUdevResource* cyinput__ptr cdef cydriver.CUdevResource* cyresult = NULL pyresult = [CUdevResource() for idx in range(nbGroups)] - if nbGroups != 0: - cyresult = calloc(nbGroups, sizeof(cydriver.CUdevResource)) - if cyresult is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cydriver.CUdevResource))) - cdef cydriver.CUdevResource* cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL - cdef CUdevResource remainder = CUdevResource() - cdef cydriver.CU_DEV_SM_RESOURCE_GROUP_PARAMS* cygroupParams_ptr = groupParams._pvt_ptr if groupParams is not None else NULL - with nogil: - err = cydriver.cuDevSmResourceSplit(cyresult, nbGroups, cyinput__ptr, remainder._pvt_ptr, flags, cygroupParams_ptr) - if CUresult(err) == CUresult(0): - for idx in range(nbGroups): - string.memcpy((pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cydriver.CUdevResource)) - if cyresult is not NULL: - free(cyresult) + try: + if nbGroups != 0: + cyresult = calloc(nbGroups, sizeof(cydriver.CUdevResource)) + if cyresult is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cydriver.CUdevResource))) + cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL + remainder = CUdevResource() + cygroupParams_ptr = groupParams._pvt_ptr if groupParams is not None else NULL + with nogil: + err = cydriver.cuDevSmResourceSplit(cyresult, nbGroups, cyinput__ptr, remainder._pvt_ptr, flags, cygroupParams_ptr) + finally: + if CUresult(err) == CUresult(0): + for idx in range(nbGroups): + string.memcpy((pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cydriver.CUdevResource)) + if cyresult is not NULL: + free(cyresult) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None, None) return (_CUresult_SUCCESS, pyresult, remainder) @@ -54229,24 +54770,27 @@ def cuDevResourceGenerateDesc(resources : Optional[tuple[CUdevResource] | list[C -------- :py:obj:`~.cuDevSmResourceSplitByCount` """ - resources = [] if resources is None else resources - if not all(isinstance(_x, (CUdevResource,)) for _x in resources): - raise TypeError("Argument 'resources' is not instance of type (expected tuple[cydriver.CUdevResource,] or list[cydriver.CUdevResource,]") - cdef CUdevResourceDesc phDesc = CUdevResourceDesc() cdef cydriver.CUdevResource* cyresources = NULL - if len(resources) > 1: - cyresources = calloc(len(resources), sizeof(cydriver.CUdevResource)) - if cyresources is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(resources)) + 'x' + str(sizeof(cydriver.CUdevResource))) - for idx in range(len(resources)): - string.memcpy(&cyresources[idx], (resources[idx])._pvt_ptr, sizeof(cydriver.CUdevResource)) - elif len(resources) == 1: - cyresources = (resources[0])._pvt_ptr - if nbResources > len(resources): raise RuntimeError("List is too small: " + str(len(resources)) + " < " + str(nbResources)) - with nogil: - err = cydriver.cuDevResourceGenerateDesc(phDesc._pvt_ptr, cyresources, nbResources) - if len(resources) > 1 and cyresources is not NULL: - free(cyresources) + cdef CUdevResourceDesc phDesc + try: + phDesc = CUdevResourceDesc() + resources = [] if resources is None else resources + if not all(isinstance(_x, (CUdevResource,)) for _x in resources): + raise TypeError("Argument 'resources' is not instance of type (expected tuple[cydriver.CUdevResource,] or list[cydriver.CUdevResource,]") + if len(resources) > 1: + cyresources = calloc(len(resources), sizeof(cydriver.CUdevResource)) + if cyresources is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(resources)) + 'x' + str(sizeof(cydriver.CUdevResource))) + for idx in range(len(resources)): + string.memcpy(&cyresources[idx], (resources[idx])._pvt_ptr, sizeof(cydriver.CUdevResource)) + elif len(resources) == 1: + cyresources = (resources[0])._pvt_ptr + if nbResources > len(resources): raise RuntimeError("List is too small: " + str(len(resources)) + " < " + str(nbResources)) + with nogil: + err = cydriver.cuDevResourceGenerateDesc(phDesc._pvt_ptr, cyresources, nbResources) + finally: + if len(resources) > 1 and cyresources is not NULL: + free(cyresources) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, phDesc) @@ -54287,13 +54831,6 @@ def cuGreenCtxRecordEvent(hCtx, hEvent): The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` if the specified green context `hCtx` has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures. """ cdef cydriver.CUevent cyhEvent - if hEvent is None: - phEvent = 0 - elif isinstance(hEvent, (CUevent,)): - phEvent = int(hEvent) - else: - phEvent = int(CUevent(hEvent)) - cyhEvent = phEvent cdef cydriver.CUgreenCtx cyhCtx if hCtx is None: phCtx = 0 @@ -54302,6 +54839,13 @@ def cuGreenCtxRecordEvent(hCtx, hEvent): else: phCtx = int(CUgreenCtx(hCtx)) cyhCtx = phCtx + if hEvent is None: + phEvent = 0 + elif isinstance(hEvent, (CUevent,)): + phEvent = int(hEvent) + else: + phEvent = int(CUevent(hEvent)) + cyhEvent = phEvent with nogil: err = cydriver.cuGreenCtxRecordEvent(cyhCtx, cyhEvent) return (_CUresult(err),) @@ -54342,13 +54886,6 @@ def cuGreenCtxWaitEvent(hCtx, hEvent): The API will return :py:obj:`~.CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED` and invalidate the capture if the specified event `hEvent` is part of an ongoing capture sequence or if the specified green context `hCtx` has a stream in the capture mode. """ cdef cydriver.CUevent cyhEvent - if hEvent is None: - phEvent = 0 - elif isinstance(hEvent, (CUevent,)): - phEvent = int(hEvent) - else: - phEvent = int(CUevent(hEvent)) - cyhEvent = phEvent cdef cydriver.CUgreenCtx cyhCtx if hCtx is None: phCtx = 0 @@ -54357,6 +54894,13 @@ def cuGreenCtxWaitEvent(hCtx, hEvent): else: phCtx = int(CUgreenCtx(hCtx)) cyhCtx = phCtx + if hEvent is None: + phEvent = 0 + elif isinstance(hEvent, (CUevent,)): + phEvent = int(hEvent) + else: + phEvent = int(CUevent(hEvent)) + cyhEvent = phEvent with nogil: err = cydriver.cuGreenCtxWaitEvent(cyhCtx, cyhEvent) return (_CUresult(err),) @@ -54406,6 +54950,7 @@ def cuStreamGetGreenCtx(hStream): -------- :py:obj:`~.cuStreamDestroy`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuStreamCreateWithPriority`, :py:obj:`~.cuStreamGetCtx`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamGetPriority`, :py:obj:`~.cuStreamGetFlags`, :py:obj:`~.cuStreamGetDevice`, :py:obj:`~.cuStreamWaitEvent`, :py:obj:`~.cuStreamQuery`, :py:obj:`~.cuStreamSynchronize`, :py:obj:`~.cuStreamAddCallback`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags` """ + cdef CUgreenCtx phCtx cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -54414,7 +54959,7 @@ def cuStreamGetGreenCtx(hStream): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef CUgreenCtx phCtx = CUgreenCtx() + phCtx = CUgreenCtx() with nogil: err = cydriver.cuStreamGetGreenCtx(cyhStream, phCtx._pvt_ptr) if err != cydriver.CUDA_SUCCESS: @@ -54482,6 +55027,8 @@ def cuGreenCtxStreamCreate(greenCtx, unsigned int flags, int priority): In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations. """ cdef cydriver.CUgreenCtx cygreenCtx + cdef CUstream phStream + phStream = CUstream() if greenCtx is None: pgreenCtx = 0 elif isinstance(greenCtx, (CUgreenCtx,)): @@ -54489,7 +55036,6 @@ def cuGreenCtxStreamCreate(greenCtx, unsigned int flags, int priority): else: pgreenCtx = int(CUgreenCtx(greenCtx)) cygreenCtx = pgreenCtx - cdef CUstream phStream = CUstream() with nogil: err = cydriver.cuGreenCtxStreamCreate(phStream._pvt_ptr, cygreenCtx, flags, priority) if err != cydriver.CUDA_SUCCESS: @@ -54525,6 +55071,7 @@ def cuGreenCtxGetId(greenCtx): -------- :py:obj:`~.cuGreenCtxCreate`, :py:obj:`~.cuGreenCtxDestroy`, :py:obj:`~.cuCtxGetId` """ + cdef unsigned long long greenCtxId = 0 cdef cydriver.CUgreenCtx cygreenCtx if greenCtx is None: pgreenCtx = 0 @@ -54533,7 +55080,6 @@ def cuGreenCtxGetId(greenCtx): else: pgreenCtx = int(CUgreenCtx(greenCtx)) cygreenCtx = pgreenCtx - cdef unsigned long long greenCtxId = 0 with nogil: err = cydriver.cuGreenCtxGetId(cygreenCtx, &greenCtxId) if err != cydriver.CUDA_SUCCESS: @@ -54572,6 +55118,8 @@ def cuStreamGetDevResource(hStream, typename not None : CUdevResourceType): -------- :py:obj:`~.cuGreenCtxCreate`, :py:obj:`~.cuGreenCtxStreamCreate`, :py:obj:`~.cuStreamCreate`, :py:obj:`~.cuDevSmResourceSplitByCount`, :py:obj:`~.cuDevResourceGenerateDesc`, :py:obj:`~.cudaStreamGetDevResource` """ + cdef cydriver.CUdevResourceType cytypename + cdef CUdevResource resource cdef cydriver.CUstream cyhStream if hStream is None: phStream = 0 @@ -54580,8 +55128,8 @@ def cuStreamGetDevResource(hStream, typename not None : CUdevResourceType): else: phStream = int(CUstream(hStream)) cyhStream = phStream - cdef CUdevResource resource = CUdevResource() - cdef cydriver.CUdevResourceType cytypename = int(typename) + resource = CUdevResource() + cytypename = int(typename) with nogil: err = cydriver.cuStreamGetDevResource(cyhStream, resource._pvt_ptr, cytypename) if err != cydriver.CUDA_SUCCESS: @@ -54623,32 +55171,35 @@ def cuLogsRegisterCallback(callbackFunc, userData): Optional location to store the callback handle after it is registered """ - cdef cydriver.CUlogsCallback cycallbackFunc - if callbackFunc is None: - pcallbackFunc = 0 - elif isinstance(callbackFunc, (CUlogsCallback,)): - pcallbackFunc = int(callbackFunc) - else: - pcallbackFunc = int(CUlogsCallback(callbackFunc)) - cycallbackFunc = pcallbackFunc + cdef CUlogsCallbackHandle callback_out cdef _HelperInputVoidPtrStruct cyuserDataHelper - cdef void* cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) - + cdef void* cyuserData cdef cuLogsCallbackData *cbData = NULL - cbData = malloc(sizeof(cbData[0])) - if cbData == NULL: - return (CUresult.CUDA_ERROR_OUT_OF_MEMORY, None) - cbData.callback = cycallbackFunc - cbData.userData = cyuserData - - cdef CUlogsCallbackHandle callback_out = CUlogsCallbackHandle() - with nogil: - err = cydriver.cuLogsRegisterCallback(cuLogsCallbackWrapper, cbData, callback_out._pvt_ptr) - if err != cydriver.CUDA_SUCCESS: - free(cbData) - else: - m_global._allocated[int(callback_out)] = cbData - _helper_input_void_ptr_free(&cyuserDataHelper) + cdef cydriver.CUlogsCallback cycallbackFunc + try: + if callbackFunc is None: + pcallbackFunc = 0 + elif isinstance(callbackFunc, (CUlogsCallback,)): + pcallbackFunc = int(callbackFunc) + else: + pcallbackFunc = int(CUlogsCallback(callbackFunc)) + cycallbackFunc = pcallbackFunc + cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) + cbData = malloc(sizeof(cbData[0])) + if cbData == NULL: + return (CUresult.CUDA_ERROR_OUT_OF_MEMORY, None) + cbData.callback = cycallbackFunc + cbData.userData = cyuserData + + callback_out = CUlogsCallbackHandle() + with nogil: + err = cydriver.cuLogsRegisterCallback(cuLogsCallbackWrapper, cbData, callback_out._pvt_ptr) + finally: + if err != cydriver.CUDA_SUCCESS: + free(cbData) + else: + m_global._allocated[int(callback_out)] = cbData + _helper_input_void_ptr_free(&cyuserDataHelper) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None) return (_CUresult_SUCCESS, callback_out) @@ -54671,18 +55222,20 @@ def cuLogsUnregisterCallback(callback): :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE` """ cdef cydriver.CUlogsCallbackHandle cycallback - if callback is None: - pcallback = 0 - elif isinstance(callback, (CUlogsCallbackHandle,)): - pcallback = int(callback) - else: - pcallback = int(CUlogsCallbackHandle(callback)) - cycallback = pcallback - with nogil: - err = cydriver.cuLogsUnregisterCallback(cycallback) - if err == cydriver.CUDA_SUCCESS: - free(m_global._allocated[pcallback]) - m_global._allocated.erase(pcallback) + try: + if callback is None: + pcallback = 0 + elif isinstance(callback, (CUlogsCallbackHandle,)): + pcallback = int(callback) + else: + pcallback = int(CUlogsCallbackHandle(callback)) + cycallback = pcallback + with nogil: + err = cydriver.cuLogsUnregisterCallback(cycallback) + finally: + if err == cydriver.CUDA_SUCCESS: + free(m_global._allocated[pcallback]) + m_global._allocated.erase(pcallback) return (_CUresult(err),) {{endif}} @@ -54704,7 +55257,8 @@ def cuLogsCurrent(unsigned int flags): iterator_out : :py:obj:`~.CUlogIterator` Location to store an iterator to the current tail of the logs """ - cdef CUlogIterator iterator_out = CUlogIterator() + cdef CUlogIterator iterator_out + iterator_out = CUlogIterator() with nogil: err = cydriver.cuLogsCurrent(iterator_out._pvt_ptr, flags) if err != cydriver.CUDA_SUCCESS: @@ -54898,7 +55452,8 @@ def cuCheckpointProcessLock(int pid, args : Optional[CUcheckpointLockArgs]): CUresult :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE` :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED` :py:obj:`~.CUDA_ERROR_ILLEGAL_STATE` :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` :py:obj:`~.CUDA_ERROR_NOT_READY` """ - cdef cydriver.CUcheckpointLockArgs* cyargs_ptr = args._pvt_ptr if args is not None else NULL + cdef cydriver.CUcheckpointLockArgs* cyargs_ptr + cyargs_ptr = args._pvt_ptr if args is not None else NULL with nogil: err = cydriver.cuCheckpointProcessLock(pid, cyargs_ptr) return (_CUresult(err),) @@ -54929,7 +55484,8 @@ def cuCheckpointProcessCheckpoint(int pid, args : Optional[CUcheckpointCheckpoin CUresult :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE` :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED` :py:obj:`~.CUDA_ERROR_ILLEGAL_STATE` :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` """ - cdef cydriver.CUcheckpointCheckpointArgs* cyargs_ptr = args._pvt_ptr if args is not None else NULL + cdef cydriver.CUcheckpointCheckpointArgs* cyargs_ptr + cyargs_ptr = args._pvt_ptr if args is not None else NULL with nogil: err = cydriver.cuCheckpointProcessCheckpoint(pid, cyargs_ptr) return (_CUresult(err),) @@ -54970,7 +55526,8 @@ def cuCheckpointProcessRestore(int pid, args : Optional[CUcheckpointRestoreArgs] -------- :py:obj:`~.cuInit` """ - cdef cydriver.CUcheckpointRestoreArgs* cyargs_ptr = args._pvt_ptr if args is not None else NULL + cdef cydriver.CUcheckpointRestoreArgs* cyargs_ptr + cyargs_ptr = args._pvt_ptr if args is not None else NULL with nogil: err = cydriver.cuCheckpointProcessRestore(pid, cyargs_ptr) return (_CUresult(err),) @@ -54999,7 +55556,8 @@ def cuCheckpointProcessUnlock(int pid, args : Optional[CUcheckpointUnlockArgs]): CUresult :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.CUDA_ERROR_INVALID_VALUE` :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED` :py:obj:`~.CUDA_ERROR_ILLEGAL_STATE` :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED` """ - cdef cydriver.CUcheckpointUnlockArgs* cyargs_ptr = args._pvt_ptr if args is not None else NULL + cdef cydriver.CUcheckpointUnlockArgs* cyargs_ptr + cyargs_ptr = args._pvt_ptr if args is not None else NULL with nogil: err = cydriver.cuCheckpointProcessUnlock(pid, cyargs_ptr) return (_CUresult(err),) @@ -55122,6 +55680,8 @@ def cuGraphicsEGLRegisterImage(image, unsigned int flags): :py:obj:`~.cuGraphicsEGLRegisterImage`, :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsResourceSetMapFlags`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cudaGraphicsEGLRegisterImage` """ cdef cydriver.EGLImageKHR cyimage + cdef CUgraphicsResource pCudaResource + pCudaResource = CUgraphicsResource() if image is None: pimage = 0 elif isinstance(image, (EGLImageKHR,)): @@ -55129,7 +55689,6 @@ def cuGraphicsEGLRegisterImage(image, unsigned int flags): else: pimage = int(EGLImageKHR(image)) cyimage = pimage - cdef CUgraphicsResource pCudaResource = CUgraphicsResource() with nogil: err = cydriver.cuGraphicsEGLRegisterImage(pCudaResource._pvt_ptr, cyimage, flags) if err != cydriver.CUDA_SUCCESS: @@ -55165,6 +55724,8 @@ def cuEGLStreamConsumerConnect(stream): :py:obj:`~.cuEGLStreamConsumerConnect`, :py:obj:`~.cuEGLStreamConsumerDisconnect`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`, :py:obj:`~.cudaEGLStreamConsumerConnect` """ cdef cydriver.EGLStreamKHR cystream + cdef CUeglStreamConnection conn + conn = CUeglStreamConnection() if stream is None: pstream = 0 elif isinstance(stream, (EGLStreamKHR,)): @@ -55172,7 +55733,6 @@ def cuEGLStreamConsumerConnect(stream): else: pstream = int(EGLStreamKHR(stream)) cystream = pstream - cdef CUeglStreamConnection conn = CUeglStreamConnection() with nogil: err = cydriver.cuEGLStreamConsumerConnect(conn._pvt_ptr, cystream) if err != cydriver.CUDA_SUCCESS: @@ -55212,6 +55772,8 @@ def cuEGLStreamConsumerConnectWithFlags(stream, unsigned int flags): :py:obj:`~.cuEGLStreamConsumerConnect`, :py:obj:`~.cuEGLStreamConsumerDisconnect`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`, :py:obj:`~.cudaEGLStreamConsumerConnectWithFlags` """ cdef cydriver.EGLStreamKHR cystream + cdef CUeglStreamConnection conn + conn = CUeglStreamConnection() if stream is None: pstream = 0 elif isinstance(stream, (EGLStreamKHR,)): @@ -55219,7 +55781,6 @@ def cuEGLStreamConsumerConnectWithFlags(stream, unsigned int flags): else: pstream = int(EGLStreamKHR(stream)) cystream = pstream - cdef CUeglStreamConnection conn = CUeglStreamConnection() with nogil: err = cydriver.cuEGLStreamConsumerConnectWithFlags(conn._pvt_ptr, cystream, flags) if err != cydriver.CUDA_SUCCESS: @@ -55302,25 +55863,7 @@ def cuEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, unsigned int t :py:obj:`~.cuEGLStreamConsumerConnect`, :py:obj:`~.cuEGLStreamConsumerDisconnect`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame` """ cdef cydriver.CUstream *cypStream - if pStream is None: - cypStream = NULL - elif isinstance(pStream, (CUstream,)): - ppStream = pStream.getPtr() - cypStream = ppStream - elif isinstance(pStream, (int)): - cypStream = pStream - else: - raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) cdef cydriver.CUgraphicsResource *cypCudaResource - if pCudaResource is None: - cypCudaResource = NULL - elif isinstance(pCudaResource, (CUgraphicsResource,)): - ppCudaResource = pCudaResource.getPtr() - cypCudaResource = ppCudaResource - elif isinstance(pCudaResource, (int)): - cypCudaResource = pCudaResource - else: - raise TypeError("Argument 'pCudaResource' is not instance of type (expected , found " + str(type(pCudaResource))) cdef cydriver.CUeglStreamConnection *cyconn if conn is None: cyconn = NULL @@ -55331,6 +55874,24 @@ def cuEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, unsigned int t cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) + if pCudaResource is None: + cypCudaResource = NULL + elif isinstance(pCudaResource, (CUgraphicsResource,)): + ppCudaResource = pCudaResource.getPtr() + cypCudaResource = ppCudaResource + elif isinstance(pCudaResource, (int)): + cypCudaResource = pCudaResource + else: + raise TypeError("Argument 'pCudaResource' is not instance of type (expected , found " + str(type(pCudaResource))) + if pStream is None: + cypStream = NULL + elif isinstance(pStream, (CUstream,)): + ppStream = pStream.getPtr() + cypStream = ppStream + elif isinstance(pStream, (int)): + cypStream = pStream + else: + raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) with nogil: err = cydriver.cuEGLStreamConsumerAcquireFrame(cyconn, cypCudaResource, cypStream, timeout) return (_CUresult(err),) @@ -55367,23 +55928,7 @@ def cuEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream): :py:obj:`~.cuEGLStreamConsumerConnect`, :py:obj:`~.cuEGLStreamConsumerDisconnect`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame` """ cdef cydriver.CUstream *cypStream - if pStream is None: - cypStream = NULL - elif isinstance(pStream, (CUstream,)): - ppStream = pStream.getPtr() - cypStream = ppStream - elif isinstance(pStream, (int)): - cypStream = pStream - else: - raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) cdef cydriver.CUgraphicsResource cypCudaResource - if pCudaResource is None: - ppCudaResource = 0 - elif isinstance(pCudaResource, (CUgraphicsResource,)): - ppCudaResource = int(pCudaResource) - else: - ppCudaResource = int(CUgraphicsResource(pCudaResource)) - cypCudaResource = ppCudaResource cdef cydriver.CUeglStreamConnection *cyconn if conn is None: cyconn = NULL @@ -55394,6 +55939,22 @@ def cuEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream): cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) + if pCudaResource is None: + ppCudaResource = 0 + elif isinstance(pCudaResource, (CUgraphicsResource,)): + ppCudaResource = int(pCudaResource) + else: + ppCudaResource = int(CUgraphicsResource(pCudaResource)) + cypCudaResource = ppCudaResource + if pStream is None: + cypStream = NULL + elif isinstance(pStream, (CUstream,)): + ppStream = pStream.getPtr() + cypStream = ppStream + elif isinstance(pStream, (int)): + cypStream = pStream + else: + raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) with nogil: err = cydriver.cuEGLStreamConsumerReleaseFrame(cyconn, cypCudaResource, cypStream) return (_CUresult(err),) @@ -55431,22 +55992,10 @@ def cuEGLStreamProducerConnect(stream, width, height): :py:obj:`~.cuEGLStreamProducerConnect`, :py:obj:`~.cuEGLStreamProducerDisconnect`, :py:obj:`~.cuEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerConnect` """ cdef cydriver.EGLint cyheight - if height is None: - pheight = 0 - elif isinstance(height, (EGLint,)): - pheight = int(height) - else: - pheight = int(EGLint(height)) - cyheight = pheight cdef cydriver.EGLint cywidth - if width is None: - pwidth = 0 - elif isinstance(width, (EGLint,)): - pwidth = int(width) - else: - pwidth = int(EGLint(width)) - cywidth = pwidth cdef cydriver.EGLStreamKHR cystream + cdef CUeglStreamConnection conn + conn = CUeglStreamConnection() if stream is None: pstream = 0 elif isinstance(stream, (EGLStreamKHR,)): @@ -55454,7 +56003,20 @@ def cuEGLStreamProducerConnect(stream, width, height): else: pstream = int(EGLStreamKHR(stream)) cystream = pstream - cdef CUeglStreamConnection conn = CUeglStreamConnection() + if width is None: + pwidth = 0 + elif isinstance(width, (EGLint,)): + pwidth = int(width) + else: + pwidth = int(EGLint(width)) + cywidth = pwidth + if height is None: + pheight = 0 + elif isinstance(height, (EGLint,)): + pheight = int(height) + else: + pheight = int(EGLint(height)) + cyheight = pheight with nogil: err = cydriver.cuEGLStreamProducerConnect(conn._pvt_ptr, cystream, cywidth, cyheight) if err != cydriver.CUDA_SUCCESS: @@ -55545,15 +56107,6 @@ def cuEGLStreamProducerPresentFrame(conn, eglframe not None : CUeglFrame, pStrea :py:obj:`~.cuEGLStreamProducerConnect`, :py:obj:`~.cuEGLStreamProducerDisconnect`, :py:obj:`~.cuEGLStreamProducerReturnFrame`, :py:obj:`~.cudaEGLStreamProducerPresentFrame` """ cdef cydriver.CUstream *cypStream - if pStream is None: - cypStream = NULL - elif isinstance(pStream, (CUstream,)): - ppStream = pStream.getPtr() - cypStream = ppStream - elif isinstance(pStream, (int)): - cypStream = pStream - else: - raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) cdef cydriver.CUeglStreamConnection *cyconn if conn is None: cyconn = NULL @@ -55564,6 +56117,15 @@ def cuEGLStreamProducerPresentFrame(conn, eglframe not None : CUeglFrame, pStrea cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) + if pStream is None: + cypStream = NULL + elif isinstance(pStream, (CUstream,)): + ppStream = pStream.getPtr() + cypStream = ppStream + elif isinstance(pStream, (int)): + cypStream = pStream + else: + raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) with nogil: err = cydriver.cuEGLStreamProducerPresentFrame(cyconn, eglframe._pvt_ptr[0], cypStream) return (_CUresult(err),) @@ -55599,15 +56161,7 @@ def cuEGLStreamProducerReturnFrame(conn, eglframe : Optional[CUeglFrame], pStrea :py:obj:`~.cuEGLStreamProducerConnect`, :py:obj:`~.cuEGLStreamProducerDisconnect`, :py:obj:`~.cuEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerReturnFrame` """ cdef cydriver.CUstream *cypStream - if pStream is None: - cypStream = NULL - elif isinstance(pStream, (CUstream,)): - ppStream = pStream.getPtr() - cypStream = ppStream - elif isinstance(pStream, (int)): - cypStream = pStream - else: - raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) + cdef cydriver.CUeglFrame* cyeglframe_ptr cdef cydriver.CUeglStreamConnection *cyconn if conn is None: cyconn = NULL @@ -55618,7 +56172,16 @@ def cuEGLStreamProducerReturnFrame(conn, eglframe : Optional[CUeglFrame], pStrea cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - cdef cydriver.CUeglFrame* cyeglframe_ptr = eglframe._pvt_ptr if eglframe is not None else NULL + cyeglframe_ptr = eglframe._pvt_ptr if eglframe is not None else NULL + if pStream is None: + cypStream = NULL + elif isinstance(pStream, (CUstream,)): + ppStream = pStream.getPtr() + cypStream = ppStream + elif isinstance(pStream, (int)): + cypStream = pStream + else: + raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) with nogil: err = cydriver.cuEGLStreamProducerReturnFrame(cyconn, cyeglframe_ptr, cypStream) return (_CUresult(err),) @@ -55658,6 +56221,8 @@ def cuGraphicsResourceGetMappedEglFrame(resource, unsigned int index, unsigned i None """ cdef cydriver.CUgraphicsResource cyresource + cdef CUeglFrame eglFrame + eglFrame = CUeglFrame() if resource is None: presource = 0 elif isinstance(resource, (CUgraphicsResource,)): @@ -55665,7 +56230,6 @@ def cuGraphicsResourceGetMappedEglFrame(resource, unsigned int index, unsigned i else: presource = int(CUgraphicsResource(resource)) cyresource = presource - cdef CUeglFrame eglFrame = CUeglFrame() with nogil: err = cydriver.cuGraphicsResourceGetMappedEglFrame(eglFrame._pvt_ptr, cyresource, index, mipLevel) if err != cydriver.CUDA_SUCCESS: @@ -55717,6 +56281,8 @@ def cuEventCreateFromEGLSync(eglSync, unsigned int flags): :py:obj:`~.cuEventQuery`, :py:obj:`~.cuEventSynchronize`, :py:obj:`~.cuEventDestroy` """ cdef cydriver.EGLSyncKHR cyeglSync + cdef CUevent phEvent + phEvent = CUevent() if eglSync is None: peglSync = 0 elif isinstance(eglSync, (EGLSyncKHR,)): @@ -55724,7 +56290,6 @@ def cuEventCreateFromEGLSync(eglSync, unsigned int flags): else: peglSync = int(EGLSyncKHR(eglSync)) cyeglSync = peglSync - cdef CUevent phEvent = CUevent() with nogil: err = cydriver.cuEventCreateFromEGLSync(phEvent._pvt_ptr, cyeglSync, flags) if err != cydriver.CUDA_SUCCESS: @@ -55774,6 +56339,8 @@ def cuGraphicsGLRegisterBuffer(buffer, unsigned int Flags): :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsResourceGetMappedPointer`, :py:obj:`~.cudaGraphicsGLRegisterBuffer` """ cdef cydriver.GLuint cybuffer + cdef CUgraphicsResource pCudaResource + pCudaResource = CUgraphicsResource() if buffer is None: pbuffer = 0 elif isinstance(buffer, (GLuint,)): @@ -55781,7 +56348,6 @@ def cuGraphicsGLRegisterBuffer(buffer, unsigned int Flags): else: pbuffer = int(GLuint(buffer)) cybuffer = pbuffer - cdef CUgraphicsResource pCudaResource = CUgraphicsResource() with nogil: err = cydriver.cuGraphicsGLRegisterBuffer(pCudaResource._pvt_ptr, cybuffer, Flags) if err != cydriver.CUDA_SUCCESS: @@ -55866,14 +56432,9 @@ def cuGraphicsGLRegisterImage(image, target, unsigned int Flags): :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaGraphicsGLRegisterImage` """ cdef cydriver.GLenum cytarget - if target is None: - ptarget = 0 - elif isinstance(target, (GLenum,)): - ptarget = int(target) - else: - ptarget = int(GLenum(target)) - cytarget = ptarget cdef cydriver.GLuint cyimage + cdef CUgraphicsResource pCudaResource + pCudaResource = CUgraphicsResource() if image is None: pimage = 0 elif isinstance(image, (GLuint,)): @@ -55881,7 +56442,13 @@ def cuGraphicsGLRegisterImage(image, target, unsigned int Flags): else: pimage = int(GLuint(image)) cyimage = pimage - cdef CUgraphicsResource pCudaResource = CUgraphicsResource() + if target is None: + ptarget = 0 + elif isinstance(target, (GLenum,)): + ptarget = int(target) + else: + ptarget = int(GLenum(target)) + cytarget = ptarget with nogil: err = cydriver.cuGraphicsGLRegisterImage(pCudaResource._pvt_ptr, cyimage, cytarget, Flags) if err != cydriver.CUDA_SUCCESS: @@ -55939,20 +56506,23 @@ def cuGLGetDevices(unsigned int cudaDeviceCount, deviceList not None : CUGLDevic This function is not supported on Mac OS X. """ - cdef unsigned int pCudaDeviceCount = 0 + cdef cydriver.CUGLDeviceList cydeviceList cdef cydriver.CUdevice* cypCudaDevices = NULL pypCudaDevices = [] - if cudaDeviceCount != 0: - cypCudaDevices = calloc(cudaDeviceCount, sizeof(cydriver.CUdevice)) - if cypCudaDevices is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(cudaDeviceCount) + 'x' + str(sizeof(cydriver.CUdevice))) - cdef cydriver.CUGLDeviceList cydeviceList = int(deviceList) - with nogil: - err = cydriver.cuGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList) - if CUresult(err) == CUresult(0): - pypCudaDevices = [CUdevice(init_value=cypCudaDevices[idx]) for idx in range(cudaDeviceCount)] - if cypCudaDevices is not NULL: - free(cypCudaDevices) + cdef unsigned int pCudaDeviceCount = 0 + try: + if cudaDeviceCount != 0: + cypCudaDevices = calloc(cudaDeviceCount, sizeof(cydriver.CUdevice)) + if cypCudaDevices is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(cudaDeviceCount) + 'x' + str(sizeof(cydriver.CUdevice))) + cydeviceList = int(deviceList) + with nogil: + err = cydriver.cuGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList) + finally: + if CUresult(err) == CUresult(0): + pypCudaDevices = [CUdevice(init_value=cypCudaDevices[idx]) for idx in range(cudaDeviceCount)] + if cypCudaDevices is not NULL: + free(cypCudaDevices) if err != cydriver.CUDA_SUCCESS: return (_CUresult(err), None, None) return (_CUresult_SUCCESS, pCudaDeviceCount, pypCudaDevices) @@ -55986,6 +56556,16 @@ def cuVDPAUGetDevice(vdpDevice, vdpGetProcAddress): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuVDPAUCtxCreate`, :py:obj:`~.cuGraphicsVDPAURegisterVideoSurface`, :py:obj:`~.cuGraphicsVDPAURegisterOutputSurface`, :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsResourceSetMapFlags`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaVDPAUGetDevice` """ cdef cydriver.VdpGetProcAddress *cyvdpGetProcAddress + cdef cydriver.VdpDevice cyvdpDevice + cdef CUdevice pDevice + pDevice = CUdevice() + if vdpDevice is None: + pvdpDevice = 0 + elif isinstance(vdpDevice, (VdpDevice,)): + pvdpDevice = int(vdpDevice) + else: + pvdpDevice = int(VdpDevice(vdpDevice)) + cyvdpDevice = pvdpDevice if vdpGetProcAddress is None: cyvdpGetProcAddress = NULL elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)): @@ -55995,15 +56575,6 @@ def cuVDPAUGetDevice(vdpDevice, vdpGetProcAddress): cyvdpGetProcAddress = vdpGetProcAddress else: raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected , found " + str(type(vdpGetProcAddress))) - cdef cydriver.VdpDevice cyvdpDevice - if vdpDevice is None: - pvdpDevice = 0 - elif isinstance(vdpDevice, (VdpDevice,)): - pvdpDevice = int(vdpDevice) - else: - pvdpDevice = int(VdpDevice(vdpDevice)) - cyvdpDevice = pvdpDevice - cdef CUdevice pDevice = CUdevice() with nogil: err = cydriver.cuVDPAUGetDevice(pDevice._pvt_ptr, cyvdpDevice, cyvdpGetProcAddress) if err != cydriver.CUDA_SUCCESS: @@ -56046,24 +56617,10 @@ def cuVDPAUCtxCreate(unsigned int flags, device, vdpDevice, vdpGetProcAddress): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuGraphicsVDPAURegisterVideoSurface`, :py:obj:`~.cuGraphicsVDPAURegisterOutputSurface`, :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsResourceSetMapFlags`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuVDPAUGetDevice` """ cdef cydriver.VdpGetProcAddress *cyvdpGetProcAddress - if vdpGetProcAddress is None: - cyvdpGetProcAddress = NULL - elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)): - pvdpGetProcAddress = vdpGetProcAddress.getPtr() - cyvdpGetProcAddress = pvdpGetProcAddress - elif isinstance(vdpGetProcAddress, (int)): - cyvdpGetProcAddress = vdpGetProcAddress - else: - raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected , found " + str(type(vdpGetProcAddress))) cdef cydriver.VdpDevice cyvdpDevice - if vdpDevice is None: - pvdpDevice = 0 - elif isinstance(vdpDevice, (VdpDevice,)): - pvdpDevice = int(vdpDevice) - else: - pvdpDevice = int(VdpDevice(vdpDevice)) - cyvdpDevice = pvdpDevice cdef cydriver.CUdevice cydevice + cdef CUcontext pCtx + pCtx = CUcontext() if device is None: pdevice = 0 elif isinstance(device, (CUdevice,)): @@ -56071,7 +56628,22 @@ def cuVDPAUCtxCreate(unsigned int flags, device, vdpDevice, vdpGetProcAddress): else: pdevice = int(CUdevice(device)) cydevice = pdevice - cdef CUcontext pCtx = CUcontext() + if vdpDevice is None: + pvdpDevice = 0 + elif isinstance(vdpDevice, (VdpDevice,)): + pvdpDevice = int(vdpDevice) + else: + pvdpDevice = int(VdpDevice(vdpDevice)) + cyvdpDevice = pvdpDevice + if vdpGetProcAddress is None: + cyvdpGetProcAddress = NULL + elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)): + pvdpGetProcAddress = vdpGetProcAddress.getPtr() + cyvdpGetProcAddress = pvdpGetProcAddress + elif isinstance(vdpGetProcAddress, (int)): + cyvdpGetProcAddress = vdpGetProcAddress + else: + raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected , found " + str(type(vdpGetProcAddress))) with nogil: err = cydriver.cuVDPAUCtxCreate(pCtx._pvt_ptr, flags, cydevice, cyvdpDevice, cyvdpGetProcAddress) if err != cydriver.CUDA_SUCCESS: @@ -56127,6 +56699,8 @@ def cuGraphicsVDPAURegisterVideoSurface(vdpSurface, unsigned int flags): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuVDPAUCtxCreate`, :py:obj:`~.cuGraphicsVDPAURegisterOutputSurface`, :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsResourceSetMapFlags`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuVDPAUGetDevice`, :py:obj:`~.cudaGraphicsVDPAURegisterVideoSurface` """ cdef cydriver.VdpVideoSurface cyvdpSurface + cdef CUgraphicsResource pCudaResource + pCudaResource = CUgraphicsResource() if vdpSurface is None: pvdpSurface = 0 elif isinstance(vdpSurface, (VdpVideoSurface,)): @@ -56134,7 +56708,6 @@ def cuGraphicsVDPAURegisterVideoSurface(vdpSurface, unsigned int flags): else: pvdpSurface = int(VdpVideoSurface(vdpSurface)) cyvdpSurface = pvdpSurface - cdef CUgraphicsResource pCudaResource = CUgraphicsResource() with nogil: err = cydriver.cuGraphicsVDPAURegisterVideoSurface(pCudaResource._pvt_ptr, cyvdpSurface, flags) if err != cydriver.CUDA_SUCCESS: @@ -56190,6 +56763,8 @@ def cuGraphicsVDPAURegisterOutputSurface(vdpSurface, unsigned int flags): :py:obj:`~.cuCtxCreate`, :py:obj:`~.cuVDPAUCtxCreate`, :py:obj:`~.cuGraphicsVDPAURegisterVideoSurface`, :py:obj:`~.cuGraphicsUnregisterResource`, :py:obj:`~.cuGraphicsResourceSetMapFlags`, :py:obj:`~.cuGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuVDPAUGetDevice`, :py:obj:`~.cudaGraphicsVDPAURegisterOutputSurface` """ cdef cydriver.VdpOutputSurface cyvdpSurface + cdef CUgraphicsResource pCudaResource + pCudaResource = CUgraphicsResource() if vdpSurface is None: pvdpSurface = 0 elif isinstance(vdpSurface, (VdpOutputSurface,)): @@ -56197,7 +56772,6 @@ def cuGraphicsVDPAURegisterOutputSurface(vdpSurface, unsigned int flags): else: pvdpSurface = int(VdpOutputSurface(vdpSurface)) cyvdpSurface = pvdpSurface - cdef CUgraphicsResource pCudaResource = CUgraphicsResource() with nogil: err = cydriver.cuGraphicsVDPAURegisterOutputSurface(pCudaResource._pvt_ptr, cyvdpSurface, flags) if err != cydriver.CUDA_SUCCESS: diff --git a/cuda_bindings/cuda/bindings/nvrtc.pyx.in b/cuda_bindings/cuda/bindings/nvrtc.pyx.in index 3586d33f7a..7af23ccfdd 100644 --- a/cuda_bindings/cuda/bindings/nvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/nvrtc.pyx.in @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# This code was automatically generated with version 13.1.0, generator version fd3f910. Do not modify it directly. +# This code was automatically generated with version 13.1.0, generator version f251d07. Do not modify it directly. from typing import Any, Optional import cython import ctypes @@ -150,7 +150,8 @@ def nvrtcGetErrorString(result not None : nvrtcResult): bytes Message string for the given :py:obj:`~.nvrtcResult` code. """ - cdef cynvrtc.nvrtcResult cyresult = int(result) + cdef cynvrtc.nvrtcResult cyresult + cyresult = int(result) with nogil: err = cynvrtc.nvrtcGetErrorString(cyresult) return (nvrtcResult.NVRTC_SUCCESS, err) @@ -172,8 +173,8 @@ def nvrtcVersion(): minor : int CUDA Runtime Compilation minor version number. """ - cdef int major = 0 cdef int minor = 0 + cdef int major = 0 with nogil: err = cynvrtc.nvrtcVersion(&major, &minor) if err != cynvrtc.NVRTC_SUCCESS: @@ -272,17 +273,20 @@ def nvrtcCreateProgram(char* src, char* name, int numHeaders, headers : Optional -------- :py:obj:`~.nvrtcDestroyProgram` """ - includeNames = [] if includeNames is None else includeNames - if not all(isinstance(_x, (bytes)) for _x in includeNames): - raise TypeError("Argument 'includeNames' is not instance of type (expected tuple[bytes] or list[bytes]") + cdef vector[const char*] cyincludeNames + cdef vector[const char*] cyheaders + cdef nvrtcProgram prog + prog = nvrtcProgram() + if numHeaders > len(headers): raise RuntimeError("List is too small: " + str(len(headers)) + " < " + str(numHeaders)) + if numHeaders > len(includeNames): raise RuntimeError("List is too small: " + str(len(includeNames)) + " < " + str(numHeaders)) headers = [] if headers is None else headers if not all(isinstance(_x, (bytes)) for _x in headers): raise TypeError("Argument 'headers' is not instance of type (expected tuple[bytes] or list[bytes]") - cdef nvrtcProgram prog = nvrtcProgram() - if numHeaders > len(headers): raise RuntimeError("List is too small: " + str(len(headers)) + " < " + str(numHeaders)) - if numHeaders > len(includeNames): raise RuntimeError("List is too small: " + str(len(includeNames)) + " < " + str(numHeaders)) - cdef vector[const char*] cyheaders = headers - cdef vector[const char*] cyincludeNames = includeNames + cyheaders = headers + includeNames = [] if includeNames is None else includeNames + if not all(isinstance(_x, (bytes)) for _x in includeNames): + raise TypeError("Argument 'includeNames' is not instance of type (expected tuple[bytes] or list[bytes]") + cyincludeNames = includeNames with nogil: err = cynvrtc.nvrtcCreateProgram(prog._pvt_ptr, src, name, numHeaders, cyheaders.data(), cyincludeNames.data()) if err != cynvrtc.NVRTC_SUCCESS: @@ -358,9 +362,7 @@ def nvrtcCompileProgram(prog, int numOptions, options : Optional[tuple[bytes] | - :py:obj:`~.NVRTC_ERROR_TIME_FILE_WRITE_FAILED` - :py:obj:`~.NVRTC_ERROR_CANCELLED` """ - options = [] if options is None else options - if not all(isinstance(_x, (bytes)) for _x in options): - raise TypeError("Argument 'options' is not instance of type (expected tuple[bytes] or list[bytes]") + cdef vector[const char*] cyoptions cdef cynvrtc.nvrtcProgram cyprog if prog is None: pprog = 0 @@ -370,7 +372,10 @@ def nvrtcCompileProgram(prog, int numOptions, options : Optional[tuple[bytes] | pprog = int(nvrtcProgram(prog)) cyprog = pprog if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) - cdef vector[const char*] cyoptions = options + options = [] if options is None else options + if not all(isinstance(_x, (bytes)) for _x in options): + raise TypeError("Argument 'options' is not instance of type (expected tuple[bytes] or list[bytes]") + cyoptions = options with nogil: err = cynvrtc.nvrtcCompileProgram(cyprog, numOptions, cyoptions.data()) return (_nvrtcResult(err),) @@ -400,6 +405,7 @@ def nvrtcGetPTXSize(prog): -------- :py:obj:`~.nvrtcGetPTX` """ + cdef size_t ptxSizeRet = 0 cdef cynvrtc.nvrtcProgram cyprog if prog is None: pprog = 0 @@ -408,7 +414,6 @@ def nvrtcGetPTXSize(prog): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - cdef size_t ptxSizeRet = 0 with nogil: err = cynvrtc.nvrtcGetPTXSize(cyprog, &ptxSizeRet) if err != cynvrtc.NVRTC_SUCCESS: @@ -477,6 +482,7 @@ def nvrtcGetCUBINSize(prog): -------- :py:obj:`~.nvrtcGetCUBIN` """ + cdef size_t cubinSizeRet = 0 cdef cynvrtc.nvrtcProgram cyprog if prog is None: pprog = 0 @@ -485,7 +491,6 @@ def nvrtcGetCUBINSize(prog): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - cdef size_t cubinSizeRet = 0 with nogil: err = cynvrtc.nvrtcGetCUBINSize(cyprog, &cubinSizeRet) if err != cynvrtc.NVRTC_SUCCESS: @@ -554,6 +559,7 @@ def nvrtcGetLTOIRSize(prog): -------- :py:obj:`~.nvrtcGetLTOIR` """ + cdef size_t LTOIRSizeRet = 0 cdef cynvrtc.nvrtcProgram cyprog if prog is None: pprog = 0 @@ -562,7 +568,6 @@ def nvrtcGetLTOIRSize(prog): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - cdef size_t LTOIRSizeRet = 0 with nogil: err = cynvrtc.nvrtcGetLTOIRSize(cyprog, <OIRSizeRet) if err != cynvrtc.NVRTC_SUCCESS: @@ -631,6 +636,7 @@ def nvrtcGetOptiXIRSize(prog): -------- :py:obj:`~.nvrtcGetOptiXIR` """ + cdef size_t optixirSizeRet = 0 cdef cynvrtc.nvrtcProgram cyprog if prog is None: pprog = 0 @@ -639,7 +645,6 @@ def nvrtcGetOptiXIRSize(prog): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - cdef size_t optixirSizeRet = 0 with nogil: err = cynvrtc.nvrtcGetOptiXIRSize(cyprog, &optixirSizeRet) if err != cynvrtc.NVRTC_SUCCESS: @@ -711,6 +716,7 @@ def nvrtcGetProgramLogSize(prog): -------- :py:obj:`~.nvrtcGetProgramLog` """ + cdef size_t logSizeRet = 0 cdef cynvrtc.nvrtcProgram cyprog if prog is None: pprog = 0 @@ -719,7 +725,6 @@ def nvrtcGetProgramLogSize(prog): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - cdef size_t logSizeRet = 0 with nogil: err = cynvrtc.nvrtcGetProgramLogSize(cyprog, &logSizeRet) if err != cynvrtc.NVRTC_SUCCESS: @@ -835,6 +840,7 @@ def nvrtcGetLoweredName(prog, char* name_expression): -------- nvrtcAddNameExpression """ + cdef const char* lowered_name = NULL cdef cynvrtc.nvrtcProgram cyprog if prog is None: pprog = 0 @@ -843,7 +849,6 @@ def nvrtcGetLoweredName(prog, char* name_expression): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - cdef const char* lowered_name = NULL with nogil: err = cynvrtc.nvrtcGetLoweredName(cyprog, name_expression, &lowered_name) if err != cynvrtc.NVRTC_SUCCESS: @@ -967,6 +972,7 @@ def nvrtcGetPCHHeapSizeRequired(prog): pointer to location where the required size of the PCH Heap will be stored """ + cdef size_t size = 0 cdef cynvrtc.nvrtcProgram cyprog if prog is None: pprog = 0 @@ -975,7 +981,6 @@ def nvrtcGetPCHHeapSizeRequired(prog): else: pprog = int(nvrtcProgram(prog)) cyprog = pprog - cdef size_t size = 0 with nogil: err = cynvrtc.nvrtcGetPCHHeapSizeRequired(cyprog, &size) if err != cynvrtc.NVRTC_SUCCESS: @@ -1027,22 +1032,26 @@ def nvrtcSetFlowCallback(prog, callback, payload): - :py:obj:`~.NVRTC_ERROR_INVALID_PROGRAM` - :py:obj:`~.NVRTC_ERROR_INVALID_INPUT` """ - cdef cynvrtc.nvrtcProgram cyprog - if prog is None: - pprog = 0 - elif isinstance(prog, (nvrtcProgram,)): - pprog = int(prog) - else: - pprog = int(nvrtcProgram(prog)) - cyprog = pprog - cdef _HelperInputVoidPtrStruct cycallbackHelper - cdef void* cycallback = _helper_input_void_ptr(callback, &cycallbackHelper) cdef _HelperInputVoidPtrStruct cypayloadHelper - cdef void* cypayload = _helper_input_void_ptr(payload, &cypayloadHelper) - with nogil: - err = cynvrtc.nvrtcSetFlowCallback(cyprog, cycallback, cypayload) - _helper_input_void_ptr_free(&cycallbackHelper) - _helper_input_void_ptr_free(&cypayloadHelper) + cdef void* cypayload + cdef _HelperInputVoidPtrStruct cycallbackHelper + cdef void* cycallback + cdef cynvrtc.nvrtcProgram cyprog + try: + if prog is None: + pprog = 0 + elif isinstance(prog, (nvrtcProgram,)): + pprog = int(prog) + else: + pprog = int(nvrtcProgram(prog)) + cyprog = pprog + cycallback = _helper_input_void_ptr(callback, &cycallbackHelper) + cypayload = _helper_input_void_ptr(payload, &cypayloadHelper) + with nogil: + err = cynvrtc.nvrtcSetFlowCallback(cyprog, cycallback, cypayload) + finally: + _helper_input_void_ptr_free(&cycallbackHelper) + _helper_input_void_ptr_free(&cypayloadHelper) return (_nvrtcResult(err),) {{endif}} diff --git a/cuda_bindings/cuda/bindings/runtime.pyx.in b/cuda_bindings/cuda/bindings/runtime.pyx.in index 7d84a176f0..7c0caf0be9 100644 --- a/cuda_bindings/cuda/bindings/runtime.pyx.in +++ b/cuda_bindings/cuda/bindings/runtime.pyx.in @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# This code was automatically generated with version 13.1.0, generator version fd3f910. Do not modify it directly. +# This code was automatically generated with version 13.1.0, generator version f251d07. Do not modify it directly. from typing import Any, Optional import cython import ctypes @@ -20725,7 +20725,8 @@ def cudaDeviceSetLimit(limit not None : cudaLimit, size_t value): -------- :py:obj:`~.cudaDeviceGetLimit`, :py:obj:`~.cuCtxSetLimit` """ - cdef cyruntime.cudaLimit cylimit = int(limit) + cdef cyruntime.cudaLimit cylimit + cylimit = int(limit) with nogil: err = cyruntime.cudaDeviceSetLimit(cylimit, value) return (_cudaError_t(err),) @@ -20782,8 +20783,9 @@ def cudaDeviceGetLimit(limit not None : cudaLimit): -------- :py:obj:`~.cudaDeviceSetLimit`, :py:obj:`~.cuCtxGetLimit` """ + cdef cyruntime.cudaLimit cylimit cdef size_t pValue = 0 - cdef cyruntime.cudaLimit cylimit = int(limit) + cylimit = int(limit) with nogil: err = cyruntime.cudaDeviceGetLimit(&pValue, cylimit) if err != cyruntime.cudaSuccess: @@ -20820,8 +20822,9 @@ def cudaDeviceGetTexture1DLinearMaxWidth(fmtDesc : Optional[cudaChannelFormatDes -------- :py:obj:`~.cuDeviceGetTexture1DLinearMaxWidth` """ + cdef cyruntime.cudaChannelFormatDesc* cyfmtDesc_ptr cdef size_t maxWidthInElements = 0 - cdef cyruntime.cudaChannelFormatDesc* cyfmtDesc_ptr = fmtDesc._pvt_ptr if fmtDesc is not None else NULL + cyfmtDesc_ptr = fmtDesc._pvt_ptr if fmtDesc is not None else NULL with nogil: err = cyruntime.cudaDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, cyfmtDesc_ptr, device) if err != cyruntime.cudaSuccess: @@ -20917,8 +20920,8 @@ def cudaDeviceGetStreamPriorityRange(): -------- :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cuCtxGetStreamPriorityRange` """ - cdef int leastPriority = 0 cdef int greatestPriority = 0 + cdef int leastPriority = 0 with nogil: err = cyruntime.cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority) if err != cyruntime.cudaSuccess: @@ -20978,7 +20981,8 @@ def cudaDeviceSetCacheConfig(cacheConfig not None : cudaFuncCache): -------- :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cuCtxSetCacheConfig` """ - cdef cyruntime.cudaFuncCache cycacheConfig = int(cacheConfig) + cdef cyruntime.cudaFuncCache cycacheConfig + cycacheConfig = int(cacheConfig) with nogil: err = cyruntime.cudaDeviceSetCacheConfig(cycacheConfig) return (_cudaError_t(err),) @@ -21051,8 +21055,9 @@ def cudaDeviceGetPCIBusId(int length, int device): -------- :py:obj:`~.cudaDeviceGetByPCIBusId`, :py:obj:`~.cuDeviceGetPCIBusId` """ + cdef char * pciBusId pypciBusId = b" " * length - cdef char* pciBusId = pypciBusId + pciBusId = pypciBusId with nogil: err = cyruntime.cudaDeviceGetPCIBusId(pciBusId, length, device) if err != cyruntime.cudaSuccess: @@ -21106,6 +21111,8 @@ def cudaIpcGetEventHandle(event): :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcGetEventHandle` """ cdef cyruntime.cudaEvent_t cyevent + cdef cudaIpcEventHandle_t handle + handle = cudaIpcEventHandle_t() if event is None: pevent = 0 elif isinstance(event, (cudaEvent_t,driver.CUevent)): @@ -21113,7 +21120,6 @@ def cudaIpcGetEventHandle(event): else: pevent = int(cudaEvent_t(event)) cyevent = pevent - cdef cudaIpcEventHandle_t handle = cudaIpcEventHandle_t() with nogil: err = cyruntime.cudaIpcGetEventHandle(handle._pvt_ptr, cyevent) if err != cyruntime.cudaSuccess: @@ -21160,7 +21166,8 @@ def cudaIpcOpenEventHandle(handle not None : cudaIpcEventHandle_t): -------- :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcOpenEventHandle` """ - cdef cudaEvent_t event = cudaEvent_t() + cdef cudaEvent_t event + event = cudaEvent_t() with nogil: err = cyruntime.cudaIpcOpenEventHandle(event._pvt_ptr, handle._pvt_ptr[0]) if err != cyruntime.cudaSuccess: @@ -21208,12 +21215,16 @@ def cudaIpcGetMemHandle(devPtr): -------- :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcGetMemHandle` """ - cdef cudaIpcMemHandle_t handle = cudaIpcMemHandle_t() cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaIpcGetMemHandle(handle._pvt_ptr, cydevPtr) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + cdef cudaIpcMemHandle_t handle + try: + handle = cudaIpcMemHandle_t() + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + with nogil: + err = cyruntime.cudaIpcGetMemHandle(handle._pvt_ptr, cydevPtr) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, handle) @@ -21329,10 +21340,13 @@ def cudaIpcCloseMemHandle(devPtr): :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle` """ cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaIpcCloseMemHandle(cydevPtr) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + try: + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + with nogil: + err = cyruntime.cudaIpcCloseMemHandle(cydevPtr) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) return (_cudaError_t(err),) {{endif}} @@ -21372,8 +21386,10 @@ def cudaDeviceFlushGPUDirectRDMAWrites(target not None : cudaFlushGPUDirectRDMAW -------- :py:obj:`~.cuFlushGPUDirectRDMAWrites` """ - cdef cyruntime.cudaFlushGPUDirectRDMAWritesTarget cytarget = int(target) - cdef cyruntime.cudaFlushGPUDirectRDMAWritesScope cyscope = int(scope) + cdef cyruntime.cudaFlushGPUDirectRDMAWritesScope cyscope + cdef cyruntime.cudaFlushGPUDirectRDMAWritesTarget cytarget + cytarget = int(target) + cyscope = int(scope) with nogil: err = cyruntime.cudaDeviceFlushGPUDirectRDMAWrites(cytarget, cyscope) return (_cudaError_t(err),) @@ -21436,36 +21452,39 @@ def cudaDeviceRegisterAsyncNotification(int device, callbackFunc, userData): -------- :py:obj:`~.cudaDeviceUnregisterAsyncNotification` """ - cdef cyruntime.cudaAsyncCallback cycallbackFunc - if callbackFunc is None: - pcallbackFunc = 0 - elif isinstance(callbackFunc, (cudaAsyncCallback,)): - pcallbackFunc = int(callbackFunc) - else: - pcallbackFunc = int(cudaAsyncCallback(callbackFunc)) - cycallbackFunc = pcallbackFunc + cdef cudaAsyncCallbackHandle_t callback cdef _HelperInputVoidPtrStruct cyuserDataHelper - cdef void* cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) - + cdef void* cyuserData cdef cudaAsyncCallbackData *cbData = NULL - cbData = malloc(sizeof(cbData[0])) - if cbData == NULL: - return (cudaError_t.cudaErrorMemoryAllocation, None) - cbData.callback = cycallbackFunc - cbData.userData = cyuserData - - cdef cudaAsyncCallbackHandle_t callback = cudaAsyncCallbackHandle_t() - with nogil: - err = cyruntime.cudaDeviceRegisterAsyncNotification(device, cudaAsyncNotificationCallbackWrapper, cbData, callback._pvt_ptr) - if err != cyruntime.cudaSuccess: - free(cbData) - else: - m_global._allocated[int(callback)] = cbData - _helper_input_void_ptr_free(&cyuserDataHelper) + cdef cyruntime.cudaAsyncCallback cycallbackFunc + try: + if callbackFunc is None: + pcallbackFunc = 0 + elif isinstance(callbackFunc, (cudaAsyncCallback,)): + pcallbackFunc = int(callbackFunc) + else: + pcallbackFunc = int(cudaAsyncCallback(callbackFunc)) + cycallbackFunc = pcallbackFunc + cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) + cbData = malloc(sizeof(cbData[0])) + if cbData == NULL: + return (cudaError_t.cudaErrorMemoryAllocation, None) + cbData.callback = cycallbackFunc + cbData.userData = cyuserData + + callback = cudaAsyncCallbackHandle_t() + with nogil: + err = cyruntime.cudaDeviceRegisterAsyncNotification(device, cudaAsyncNotificationCallbackWrapper, cbData, callback._pvt_ptr) + finally: + if err != cyruntime.cudaSuccess: + free(cbData) + else: + m_global._allocated[int(callback)] = cbData + _helper_input_void_ptr_free(&cyuserDataHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) - return (_cudaError_t(err), callback) + return (_cudaError_t_SUCCESS, callback) {{endif}} {{if 'cudaDeviceUnregisterAsyncNotification' in found_functions}} @@ -21495,18 +21514,20 @@ def cudaDeviceUnregisterAsyncNotification(int device, callback): :py:obj:`~.cudaDeviceRegisterAsyncNotification` """ cdef cyruntime.cudaAsyncCallbackHandle_t cycallback - if callback is None: - pcallback = 0 - elif isinstance(callback, (cudaAsyncCallbackHandle_t,)): - pcallback = int(callback) - else: - pcallback = int(cudaAsyncCallbackHandle_t(callback)) - cycallback = pcallback - with nogil: - err = cyruntime.cudaDeviceUnregisterAsyncNotification(device, cycallback) - if err == cyruntime.cudaSuccess: - free(m_global._allocated[pcallback]) - m_global._allocated.erase(pcallback) + try: + if callback is None: + pcallback = 0 + elif isinstance(callback, (cudaAsyncCallbackHandle_t,)): + pcallback = int(callback) + else: + pcallback = int(cudaAsyncCallbackHandle_t(callback)) + cycallback = pcallback + with nogil: + err = cyruntime.cudaDeviceUnregisterAsyncNotification(device, cycallback) + finally: + if err == cyruntime.cudaSuccess: + free(m_global._allocated[pcallback]) + m_global._allocated.erase(pcallback) return (_cudaError_t(err),) {{endif}} @@ -21605,7 +21626,8 @@ def cudaDeviceSetSharedMemConfig(config not None : cudaSharedMemConfig): -------- :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuCtxSetSharedMemConfig` """ - cdef cyruntime.cudaSharedMemConfig cyconfig = int(config) + cdef cyruntime.cudaSharedMemConfig cyconfig + cyconfig = int(config) with nogil: err = cyruntime.cudaDeviceSetSharedMemConfig(cyconfig) return (_cudaError_t(err),) @@ -21694,7 +21716,8 @@ def cudaGetErrorName(error not None : cudaError_t): -------- :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaError`, :py:obj:`~.cuGetErrorName` """ - cdef cyruntime.cudaError_t cyerror = int(error) + cdef cyruntime.cudaError_t cyerror + cyerror = int(error) with nogil: err = cyruntime.cudaGetErrorName(cyerror) return (cudaError_t.cudaSuccess, err) @@ -21725,7 +21748,8 @@ def cudaGetErrorString(error not None : cudaError_t): -------- :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaError`, :py:obj:`~.cuGetErrorString` """ - cdef cyruntime.cudaError_t cyerror = int(error) + cdef cyruntime.cudaError_t cyerror + cyerror = int(error) with nogil: err = cyruntime.cudaGetErrorString(cyerror) return (cudaError_t.cudaSuccess, err) @@ -21784,7 +21808,8 @@ def cudaGetDeviceProperties(int device): -------- :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetName` """ - cdef cudaDeviceProp prop = cudaDeviceProp() + cdef cudaDeviceProp prop + prop = cudaDeviceProp() with nogil: err = cyruntime.cudaGetDeviceProperties(prop._pvt_ptr, device) if err != cyruntime.cudaSuccess: @@ -21819,8 +21844,9 @@ def cudaDeviceGetAttribute(attr not None : cudaDeviceAttr, int device): -------- :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetAttribute` """ + cdef cyruntime.cudaDeviceAttr cyattr cdef int value = 0 - cdef cyruntime.cudaDeviceAttr cyattr = int(attr) + cyattr = int(attr) with nogil: err = cyruntime.cudaDeviceGetAttribute(&value, cyattr, device) if err != cyruntime.cudaSuccess: @@ -21869,23 +21895,26 @@ def cudaDeviceGetHostAtomicCapabilities(operations : Optional[tuple[cudaAtomicOp -------- :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaDeviceGetP2PAtomicCapabilities`, :py:obj:`~.cuDeviceGeHostAtomicCapabilities` """ - operations = [] if operations is None else operations - if not all(isinstance(_x, (cudaAtomicOperation)) for _x in operations): - raise TypeError("Argument 'operations' is not instance of type (expected tuple[cyruntime.cudaAtomicOperation] or list[cyruntime.cudaAtomicOperation]") + cdef vector[cyruntime.cudaAtomicOperation] cyoperations cdef unsigned int* cycapabilities = NULL pycapabilities = [] - if count != 0: - cycapabilities = calloc(count, sizeof(unsigned int)) - if cycapabilities is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) - cdef vector[cyruntime.cudaAtomicOperation] cyoperations = [int(pyoperations) for pyoperations in (operations)] - if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count)) - with nogil: - err = cyruntime.cudaDeviceGetHostAtomicCapabilities(cycapabilities, cyoperations.data(), count, device) - if cudaError_t(err) == cudaError_t(0): - pycapabilities = [cycapabilities[idx] for idx in range(count)] - if cycapabilities is not NULL: - free(cycapabilities) + try: + if count != 0: + cycapabilities = calloc(count, sizeof(unsigned int)) + if cycapabilities is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) + operations = [] if operations is None else operations + if not all(isinstance(_x, (cudaAtomicOperation)) for _x in operations): + raise TypeError("Argument 'operations' is not instance of type (expected tuple[cyruntime.cudaAtomicOperation] or list[cyruntime.cudaAtomicOperation]") + cyoperations = operations + if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count)) + with nogil: + err = cyruntime.cudaDeviceGetHostAtomicCapabilities(cycapabilities, cyoperations.data(), count, device) + finally: + if cudaError_t(err) == cudaError_t(0): + pycapabilities = [cycapabilities[idx] for idx in range(count)] + if cycapabilities is not NULL: + free(cycapabilities) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pycapabilities) @@ -21916,7 +21945,8 @@ def cudaDeviceGetDefaultMemPool(int device): -------- :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaMemPoolTrimTo`, :py:obj:`~.cudaMemPoolGetAttribute`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaMemPoolSetAttribute`, :py:obj:`~.cudaMemPoolSetAccess` """ - cdef cudaMemPool_t memPool = cudaMemPool_t() + cdef cudaMemPool_t memPool + memPool = cudaMemPool_t() with nogil: err = cyruntime.cudaDeviceGetDefaultMemPool(memPool._pvt_ptr, device) if err != cyruntime.cudaSuccess: @@ -21998,7 +22028,8 @@ def cudaDeviceGetMemPool(int device): -------- :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceSetMemPool` """ - cdef cudaMemPool_t memPool = cudaMemPool_t() + cdef cudaMemPool_t memPool + memPool = cudaMemPool_t() with nogil: err = cyruntime.cudaDeviceGetMemPool(memPool._pvt_ptr, device) if err != cyruntime.cudaSuccess: @@ -22086,10 +22117,13 @@ def cudaDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, int device, int flags): :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync` """ cdef _HelperInputVoidPtrStruct cynvSciSyncAttrListHelper - cdef void* cynvSciSyncAttrList = _helper_input_void_ptr(nvSciSyncAttrList, &cynvSciSyncAttrListHelper) - with nogil: - err = cyruntime.cudaDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList, device, flags) - _helper_input_void_ptr_free(&cynvSciSyncAttrListHelper) + cdef void* cynvSciSyncAttrList + try: + cynvSciSyncAttrList = _helper_input_void_ptr(nvSciSyncAttrList, &cynvSciSyncAttrListHelper) + with nogil: + err = cyruntime.cudaDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList, device, flags) + finally: + _helper_input_void_ptr_free(&cynvSciSyncAttrListHelper) return (_cudaError_t(err),) {{endif}} @@ -22149,8 +22183,9 @@ def cudaDeviceGetP2PAttribute(attr not None : cudaDeviceP2PAttr, int srcDevice, -------- :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cuDeviceGetP2PAttribute` :py:obj:`~.cudaDeviceGetP2PAtomicCapabilities` """ + cdef cyruntime.cudaDeviceP2PAttr cyattr cdef int value = 0 - cdef cyruntime.cudaDeviceP2PAttr cyattr = int(attr) + cyattr = int(attr) with nogil: err = cyruntime.cudaDeviceGetP2PAttribute(&value, cyattr, srcDevice, dstDevice) if err != cyruntime.cudaSuccess: @@ -22203,23 +22238,26 @@ def cudaDeviceGetP2PAtomicCapabilities(operations : Optional[tuple[cudaAtomicOpe -------- :py:obj:`~.cudaDeviceGetP2PAttribute`, :py:obj:`~.cuDeviceGetP2PAttribute`, :py:obj:`~.cuDeviceGetP2PAtomicCapabilities` """ - operations = [] if operations is None else operations - if not all(isinstance(_x, (cudaAtomicOperation)) for _x in operations): - raise TypeError("Argument 'operations' is not instance of type (expected tuple[cyruntime.cudaAtomicOperation] or list[cyruntime.cudaAtomicOperation]") + cdef vector[cyruntime.cudaAtomicOperation] cyoperations cdef unsigned int* cycapabilities = NULL pycapabilities = [] - if count != 0: - cycapabilities = calloc(count, sizeof(unsigned int)) - if cycapabilities is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) - cdef vector[cyruntime.cudaAtomicOperation] cyoperations = [int(pyoperations) for pyoperations in (operations)] - if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count)) - with nogil: - err = cyruntime.cudaDeviceGetP2PAtomicCapabilities(cycapabilities, cyoperations.data(), count, srcDevice, dstDevice) - if cudaError_t(err) == cudaError_t(0): - pycapabilities = [cycapabilities[idx] for idx in range(count)] - if cycapabilities is not NULL: - free(cycapabilities) + try: + if count != 0: + cycapabilities = calloc(count, sizeof(unsigned int)) + if cycapabilities is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) + operations = [] if operations is None else operations + if not all(isinstance(_x, (cudaAtomicOperation)) for _x in operations): + raise TypeError("Argument 'operations' is not instance of type (expected tuple[cyruntime.cudaAtomicOperation] or list[cyruntime.cudaAtomicOperation]") + cyoperations = operations + if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count)) + with nogil: + err = cyruntime.cudaDeviceGetP2PAtomicCapabilities(cycapabilities, cyoperations.data(), count, srcDevice, dstDevice) + finally: + if cudaError_t(err) == cudaError_t(0): + pycapabilities = [cycapabilities[idx] for idx in range(count)] + if cycapabilities is not NULL: + free(cycapabilities) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pycapabilities) @@ -22250,8 +22288,9 @@ def cudaChooseDevice(prop : Optional[cudaDeviceProp]): -------- :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaInitDevice` """ + cdef cyruntime.cudaDeviceProp* cyprop_ptr cdef int device = 0 - cdef cyruntime.cudaDeviceProp* cyprop_ptr = prop._pvt_ptr if prop is not None else NULL + cyprop_ptr = prop._pvt_ptr if prop is not None else NULL with nogil: err = cyruntime.cudaChooseDevice(&device, cyprop_ptr) if err != cyruntime.cudaSuccess: @@ -22545,7 +22584,8 @@ def cudaStreamCreate(): -------- :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreate` """ - cdef cudaStream_t pStream = cudaStream_t() + cdef cudaStream_t pStream + pStream = cudaStream_t() with nogil: err = cyruntime.cudaStreamCreate(pStream._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -22589,7 +22629,8 @@ def cudaStreamCreateWithFlags(unsigned int flags): -------- :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreate` """ - cdef cudaStream_t pStream = cudaStream_t() + cdef cudaStream_t pStream + pStream = cudaStream_t() with nogil: err = cyruntime.cudaStreamCreateWithFlags(pStream._pvt_ptr, flags) if err != cyruntime.cudaSuccess: @@ -22650,7 +22691,8 @@ def cudaStreamCreateWithPriority(unsigned int flags, int priority): In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations. """ - cdef cudaStream_t pStream = cudaStream_t() + cdef cudaStream_t pStream + pStream = cudaStream_t() with nogil: err = cyruntime.cudaStreamCreateWithPriority(pStream._pvt_ptr, flags, priority) if err != cyruntime.cudaSuccess: @@ -22688,6 +22730,7 @@ def cudaStreamGetPriority(hStream): -------- :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaDeviceGetStreamPriorityRange`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cuStreamGetPriority` """ + cdef int priority = 0 cdef cyruntime.cudaStream_t cyhStream if hStream is None: phStream = 0 @@ -22696,7 +22739,6 @@ def cudaStreamGetPriority(hStream): else: phStream = int(cudaStream_t(hStream)) cyhStream = phStream - cdef int priority = 0 with nogil: err = cyruntime.cudaStreamGetPriority(cyhStream, &priority) if err != cyruntime.cudaSuccess: @@ -22730,6 +22772,7 @@ def cudaStreamGetFlags(hStream): -------- :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cuStreamGetFlags` """ + cdef unsigned int flags = 0 cdef cyruntime.cudaStream_t cyhStream if hStream is None: phStream = 0 @@ -22738,7 +22781,6 @@ def cudaStreamGetFlags(hStream): else: phStream = int(cudaStream_t(hStream)) cyhStream = phStream - cdef unsigned int flags = 0 with nogil: err = cyruntime.cudaStreamGetFlags(cyhStream, &flags) if err != cyruntime.cudaSuccess: @@ -22786,6 +22828,7 @@ def cudaStreamGetId(hStream): -------- :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cuStreamGetId` """ + cdef unsigned long long streamId = 0 cdef cyruntime.cudaStream_t cyhStream if hStream is None: phStream = 0 @@ -22794,7 +22837,6 @@ def cudaStreamGetId(hStream): else: phStream = int(cudaStream_t(hStream)) cyhStream = phStream - cdef unsigned long long streamId = 0 with nogil: err = cyruntime.cudaStreamGetId(cyhStream, &streamId) if err != cyruntime.cudaSuccess: @@ -22826,6 +22868,7 @@ def cudaStreamGetDevice(hStream): -------- :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cuStreamGetId` """ + cdef int device = 0 cdef cyruntime.cudaStream_t cyhStream if hStream is None: phStream = 0 @@ -22834,7 +22877,6 @@ def cudaStreamGetDevice(hStream): else: phStream = int(cudaStream_t(hStream)) cyhStream = phStream - cdef int device = 0 with nogil: err = cyruntime.cudaStreamGetDevice(cyhStream, &device) if err != cyruntime.cudaSuccess: @@ -22891,13 +22933,6 @@ def cudaStreamCopyAttributes(dst, src): :py:obj:`~.cudaAccessPolicyWindow` """ cdef cyruntime.cudaStream_t cysrc - if src is None: - psrc = 0 - elif isinstance(src, (cudaStream_t,driver.CUstream)): - psrc = int(src) - else: - psrc = int(cudaStream_t(src)) - cysrc = psrc cdef cyruntime.cudaStream_t cydst if dst is None: pdst = 0 @@ -22906,6 +22941,13 @@ def cudaStreamCopyAttributes(dst, src): else: pdst = int(cudaStream_t(dst)) cydst = pdst + if src is None: + psrc = 0 + elif isinstance(src, (cudaStream_t,driver.CUstream)): + psrc = int(src) + else: + psrc = int(cudaStream_t(src)) + cysrc = psrc with nogil: err = cyruntime.cudaStreamCopyAttributes(cydst, cysrc) return (_cudaError_t(err),) @@ -22938,6 +22980,8 @@ def cudaStreamGetAttribute(hStream, attr not None : cudaStreamAttrID): -------- :py:obj:`~.cudaAccessPolicyWindow` """ + cdef cudaStreamAttrValue value_out + cdef cyruntime.cudaStreamAttrID cyattr cdef cyruntime.cudaStream_t cyhStream if hStream is None: phStream = 0 @@ -22946,8 +22990,8 @@ def cudaStreamGetAttribute(hStream, attr not None : cudaStreamAttrID): else: phStream = int(cudaStream_t(hStream)) cyhStream = phStream - cdef cyruntime.cudaStreamAttrID cyattr = int(attr) - cdef cudaStreamAttrValue value_out = cudaStreamAttrValue() + cyattr = int(attr) + value_out = cudaStreamAttrValue() with nogil: err = cyruntime.cudaStreamGetAttribute(cyhStream, cyattr, value_out._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -22983,6 +23027,8 @@ def cudaStreamSetAttribute(hStream, attr not None : cudaStreamAttrID, value : Op -------- :py:obj:`~.cudaAccessPolicyWindow` """ + cdef cyruntime.cudaStreamAttrValue* cyvalue_ptr + cdef cyruntime.cudaStreamAttrID cyattr cdef cyruntime.cudaStream_t cyhStream if hStream is None: phStream = 0 @@ -22991,8 +23037,8 @@ def cudaStreamSetAttribute(hStream, attr not None : cudaStreamAttrID, value : Op else: phStream = int(cudaStream_t(hStream)) cyhStream = phStream - cdef cyruntime.cudaStreamAttrID cyattr = int(attr) - cdef cyruntime.cudaStreamAttrValue* cyvalue_ptr = value._pvt_ptr if value is not None else NULL + cyattr = int(attr) + cyvalue_ptr = value._pvt_ptr if value is not None else NULL with nogil: err = cyruntime.cudaStreamSetAttribute(cyhStream, cyattr, cyvalue_ptr) return (_cudaError_t(err),) @@ -23076,13 +23122,6 @@ def cudaStreamWaitEvent(stream, event, unsigned int flags): :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamWaitEvent` """ cdef cyruntime.cudaEvent_t cyevent - if event is None: - pevent = 0 - elif isinstance(event, (cudaEvent_t,driver.CUevent)): - pevent = int(event) - else: - pevent = int(cudaEvent_t(event)) - cyevent = pevent cdef cyruntime.cudaStream_t cystream if stream is None: pstream = 0 @@ -23091,6 +23130,13 @@ def cudaStreamWaitEvent(stream, event, unsigned int flags): else: pstream = int(cudaStream_t(stream)) cystream = pstream + if event is None: + pevent = 0 + elif isinstance(event, (cudaEvent_t,driver.CUevent)): + pevent = int(event) + else: + pevent = int(cudaEvent_t(event)) + cyevent = pevent with nogil: err = cyruntime.cudaStreamWaitEvent(cystream, cyevent, flags) return (_cudaError_t(err),) @@ -23180,37 +23226,39 @@ def cudaStreamAddCallback(stream, callback, userData, unsigned int flags): ----- This function is slated for eventual deprecation and removal. If you do not require the callback to execute in case of a device error, consider using :py:obj:`~.cudaLaunchHostFunc`. Additionally, this function is not supported with :py:obj:`~.cudaStreamBeginCapture` and :py:obj:`~.cudaStreamEndCapture`, unlike :py:obj:`~.cudaLaunchHostFunc`. """ - cdef cyruntime.cudaStreamCallback_t cycallback - if callback is None: - pcallback = 0 - elif isinstance(callback, (cudaStreamCallback_t,)): - pcallback = int(callback) - else: - pcallback = int(cudaStreamCallback_t(callback)) - cycallback = pcallback - cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef _HelperInputVoidPtrStruct cyuserDataHelper - cdef void* cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) - + cdef void* cyuserData cdef cudaStreamCallbackData *cbData = NULL - cbData = malloc(sizeof(cbData[0])) - if cbData == NULL: - return (cudaError_t.cudaErrorMemoryAllocation,) - cbData.callback = cycallback - cbData.userData = cyuserData - - with nogil: - err = cyruntime.cudaStreamAddCallback(cystream, cudaStreamRtCallbackWrapper, cbData, flags) - if err != cyruntime.cudaSuccess: - free(cbData) - _helper_input_void_ptr_free(&cyuserDataHelper) + cdef cyruntime.cudaStreamCallback_t cycallback + cdef cyruntime.cudaStream_t cystream + try: + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + if callback is None: + pcallback = 0 + elif isinstance(callback, (cudaStreamCallback_t,)): + pcallback = int(callback) + else: + pcallback = int(cudaStreamCallback_t(callback)) + cycallback = pcallback + cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) + cbData = malloc(sizeof(cbData[0])) + if cbData == NULL: + return (cudaError_t.cudaErrorMemoryAllocation,) + cbData.callback = cycallback + cbData.userData = cyuserData + + with nogil: + err = cyruntime.cudaStreamAddCallback(cystream, cudaStreamRtCallbackWrapper, cbData, flags) + finally: + if err != cyruntime.cudaSuccess: + free(cbData) + _helper_input_void_ptr_free(&cyuserDataHelper) return (_cudaError_t(err),) {{endif}} @@ -23389,19 +23437,22 @@ def cudaStreamAttachMemAsync(stream, devPtr, size_t length, unsigned int flags): -------- :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cuStreamAttachMemAsync` """ - cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaStreamAttachMemAsync(cystream, cydevPtr, length, flags) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + cdef cyruntime.cudaStream_t cystream + try: + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + with nogil: + err = cyruntime.cudaStreamAttachMemAsync(cystream, cydevPtr, length, flags) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) return (_cudaError_t(err),) {{endif}} @@ -23448,6 +23499,7 @@ def cudaStreamBeginCapture(stream, mode not None : cudaStreamCaptureMode): ----- Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects. """ + cdef cyruntime.cudaStreamCaptureMode cymode cdef cyruntime.cudaStream_t cystream if stream is None: pstream = 0 @@ -23456,7 +23508,7 @@ def cudaStreamBeginCapture(stream, mode not None : cudaStreamCaptureMode): else: pstream = int(cudaStream_t(stream)) cystream = pstream - cdef cyruntime.cudaStreamCaptureMode cymode = int(mode) + cymode = int(mode) with nogil: err = cyruntime.cudaStreamBeginCapture(cystream, cymode) return (_cudaError_t(err),) @@ -23515,55 +23567,58 @@ def cudaStreamBeginCaptureToGraph(stream, graph, dependencies : Optional[tuple[c ----- Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects. """ - dependencyData = [] if dependencyData is None else dependencyData - if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData): - raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + cdef cyruntime.cudaStreamCaptureMode cymode + cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL + cdef cyruntime.cudaGraphNode_t* cydependencies = NULL cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - cdef cyruntime.cudaGraphNode_t* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr - cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 1: - cydependencyData = calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData)) - if cydependencyData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - for idx in range(len(dependencyData)): - string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) - elif len(dependencyData) == 1: - cydependencyData = (dependencyData[0])._pvt_ptr - if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) - cdef cyruntime.cudaStreamCaptureMode cymode = int(mode) - with nogil: - err = cyruntime.cudaStreamBeginCaptureToGraph(cystream, cygraph, cydependencies, cydependencyData, numDependencies, cymode) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) - if len(dependencyData) > 1 and cydependencyData is not NULL: - free(cydependencyData) + try: + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + dependencyData = [] if dependencyData is None else dependencyData + if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData): + raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") + if len(dependencyData) > 1: + cydependencyData = calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData)) + if cydependencyData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) + for idx in range(len(dependencyData)): + string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr + if numDependencies > len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies)) + cymode = int(mode) + with nogil: + err = cyruntime.cudaStreamBeginCaptureToGraph(cystream, cygraph, cydependencies, cydependencyData, numDependencies, cymode) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) + if len(dependencyData) > 1 and cydependencyData is not NULL: + free(cydependencyData) return (_cudaError_t(err),) {{endif}} @@ -23633,7 +23688,8 @@ def cudaThreadExchangeStreamCaptureMode(mode not None : cudaStreamCaptureMode): -------- :py:obj:`~.cudaStreamBeginCapture` """ - cdef cyruntime.cudaStreamCaptureMode cymode = int(mode) + cdef cyruntime.cudaStreamCaptureMode cymode + cymode = int(mode) with nogil: err = cyruntime.cudaThreadExchangeStreamCaptureMode(&cymode) if err != cyruntime.cudaSuccess: @@ -23673,6 +23729,7 @@ def cudaStreamEndCapture(stream): -------- :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaGraphDestroy` """ + cdef cudaGraph_t pGraph cdef cyruntime.cudaStream_t cystream if stream is None: pstream = 0 @@ -23681,7 +23738,7 @@ def cudaStreamEndCapture(stream): else: pstream = int(cudaStream_t(stream)) cystream = pstream - cdef cudaGraph_t pGraph = cudaGraph_t() + pGraph = cudaGraph_t() with nogil: err = cyruntime.cudaStreamEndCapture(cystream, pGraph._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -23735,6 +23792,7 @@ def cudaStreamIsCapturing(stream): -------- :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamEndCapture` """ + cdef cyruntime.cudaStreamCaptureStatus pCaptureStatus cdef cyruntime.cudaStream_t cystream if stream is None: pstream = 0 @@ -23743,7 +23801,6 @@ def cudaStreamIsCapturing(stream): else: pstream = int(cudaStream_t(stream)) cystream = pstream - cdef cyruntime.cudaStreamCaptureStatus pCaptureStatus with nogil: err = cyruntime.cudaStreamIsCapturing(cystream, &pCaptureStatus) if err != cyruntime.cudaSuccess: @@ -23824,28 +23881,31 @@ def cudaStreamGetCaptureInfo(stream): -------- :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamUpdateCaptureDependencies` """ - cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - cdef cyruntime.cudaStreamCaptureStatus captureStatus_out - cdef unsigned long long id_out = 0 - cdef cudaGraph_t graph_out = cudaGraph_t() - cdef const cyruntime.cudaGraphNode_t* cydependencies_out = NULL - pydependencies_out = [] + cdef size_t numDependencies_out = 0 cdef const cyruntime.cudaGraphEdgeData* cyedgeData_out = NULL pyedgeData_out = [] - cdef size_t numDependencies_out = 0 - with nogil: - err = cyruntime.cudaStreamGetCaptureInfo(cystream, &captureStatus_out, &id_out, graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out) - if cudaError_t(err) == cudaError_t(0): - pydependencies_out = [cudaGraphNode_t(init_value=cydependencies_out[idx]) for idx in range(numDependencies_out)] - if cudaError_t(err) == cudaError_t(0): - pyedgeData_out = [cudaGraphEdgeData(_ptr=&cyedgeData_out[idx]) for idx in range(numDependencies_out)] + cdef const cyruntime.cudaGraphNode_t* cydependencies_out = NULL + pydependencies_out = [] + cdef cudaGraph_t graph_out + cdef unsigned long long id_out = 0 + cdef cyruntime.cudaStreamCaptureStatus captureStatus_out + cdef cyruntime.cudaStream_t cystream + try: + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + graph_out = cudaGraph_t() + with nogil: + err = cyruntime.cudaStreamGetCaptureInfo(cystream, &captureStatus_out, &id_out, graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out) + finally: + if cudaError_t(err) == cudaError_t(0): + pydependencies_out = [cudaGraphNode_t(init_value=cydependencies_out[idx]) for idx in range(numDependencies_out)] + if cudaError_t(err) == cudaError_t(0): + pyedgeData_out = [cudaGraphEdgeData(_ptr=&cyedgeData_out[idx]) for idx in range(numDependencies_out)] if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None, None, None, None, None, None) return (_cudaError_t_SUCCESS, cudaStreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, pyedgeData_out, numDependencies_out) @@ -23896,45 +23956,47 @@ def cudaStreamUpdateCaptureDependencies(stream, dependencies : Optional[tuple[cu -------- :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamGetCaptureInfo`, """ - dependencyData = [] if dependencyData is None else dependencyData - if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData): - raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") - dependencies = [] if dependencies is None else dependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies): - raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - cdef cyruntime.cudaGraphNode_t* cydependencies = NULL - if len(dependencies) > 1: - cydependencies = calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cydependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(dependencies)): - cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] - elif len(dependencies) == 1: - cydependencies = (dependencies[0])._pvt_ptr cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 1: - cydependencyData = calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData)) - if cydependencyData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - for idx in range(len(dependencyData)): - string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) - elif len(dependencyData) == 1: - cydependencyData = (dependencyData[0])._pvt_ptr - with nogil: - err = cyruntime.cudaStreamUpdateCaptureDependencies(cystream, cydependencies, cydependencyData, numDependencies, flags) - if len(dependencies) > 1 and cydependencies is not NULL: - free(cydependencies) - if len(dependencyData) > 1 and cydependencyData is not NULL: - free(cydependencyData) + cdef cyruntime.cudaGraphNode_t* cydependencies = NULL + cdef cyruntime.cudaStream_t cystream + try: + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + dependencies = [] if dependencies is None else dependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies): + raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(dependencies) > 1: + cydependencies = calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cydependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(dependencies)): + cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] + elif len(dependencies) == 1: + cydependencies = (dependencies[0])._pvt_ptr + dependencyData = [] if dependencyData is None else dependencyData + if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData): + raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") + if len(dependencyData) > 1: + cydependencyData = calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData)) + if cydependencyData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) + for idx in range(len(dependencyData)): + string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr + with nogil: + err = cyruntime.cudaStreamUpdateCaptureDependencies(cystream, cydependencies, cydependencyData, numDependencies, flags) + finally: + if len(dependencies) > 1 and cydependencies is not NULL: + free(cydependencies) + if len(dependencyData) > 1 and cydependencyData is not NULL: + free(cydependencyData) return (_cudaError_t(err),) {{endif}} @@ -23958,7 +24020,8 @@ def cudaEventCreate(): -------- cudaEventCreate (C++ API), :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuEventCreate` """ - cdef cudaEvent_t event = cudaEvent_t() + cdef cudaEvent_t event + event = cudaEvent_t() with nogil: err = cyruntime.cudaEventCreate(event._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -24010,7 +24073,8 @@ def cudaEventCreateWithFlags(unsigned int flags): -------- :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuEventCreate` """ - cdef cudaEvent_t event = cudaEvent_t() + cdef cudaEvent_t event + event = cudaEvent_t() with nogil: err = cyruntime.cudaEventCreateWithFlags(event._pvt_ptr, flags) if err != cyruntime.cudaSuccess: @@ -24057,13 +24121,6 @@ def cudaEventRecord(event, stream): :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cuEventRecord` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef cyruntime.cudaEvent_t cyevent if event is None: pevent = 0 @@ -24072,6 +24129,13 @@ def cudaEventRecord(event, stream): else: pevent = int(cudaEvent_t(event)) cyevent = pevent + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream with nogil: err = cyruntime.cudaEventRecord(cyevent, cystream) return (_cudaError_t(err),) @@ -24125,13 +24189,6 @@ def cudaEventRecordWithFlags(event, stream, unsigned int flags): :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cuEventRecord`, """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef cyruntime.cudaEvent_t cyevent if event is None: pevent = 0 @@ -24140,6 +24197,13 @@ def cudaEventRecordWithFlags(event, stream, unsigned int flags): else: pevent = int(cudaEvent_t(event)) cyevent = pevent + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream with nogil: err = cyruntime.cudaEventRecordWithFlags(cyevent, cystream, flags) return (_cudaError_t(err),) @@ -24325,14 +24389,8 @@ def cudaEventElapsedTime(start, end): :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cuEventElapsedTime` """ cdef cyruntime.cudaEvent_t cyend - if end is None: - pend = 0 - elif isinstance(end, (cudaEvent_t,driver.CUevent)): - pend = int(end) - else: - pend = int(cudaEvent_t(end)) - cyend = pend cdef cyruntime.cudaEvent_t cystart + cdef float ms = 0 if start is None: pstart = 0 elif isinstance(start, (cudaEvent_t,driver.CUevent)): @@ -24340,7 +24398,13 @@ def cudaEventElapsedTime(start, end): else: pstart = int(cudaEvent_t(start)) cystart = pstart - cdef float ms = 0 + if end is None: + pend = 0 + elif isinstance(end, (cudaEvent_t,driver.CUevent)): + pend = int(end) + else: + pend = int(cudaEvent_t(end)) + cyend = pend with nogil: err = cyruntime.cudaEventElapsedTime(&ms, cystart, cyend) if err != cyruntime.cudaSuccess: @@ -24493,8 +24557,10 @@ def cudaImportExternalMemory(memHandleDesc : Optional[cudaExternalMemoryHandleDe If the Vulkan memory imported into CUDA is mapped on the CPU then the application must use vkInvalidateMappedMemoryRanges/vkFlushMappedMemoryRanges as well as appropriate Vulkan pipeline barriers to maintain coherence between CPU and GPU. For more information on these APIs, please refer to "Synchronization and Cache Control" chapter from Vulkan specification. """ - cdef cudaExternalMemory_t extMem_out = cudaExternalMemory_t() - cdef cyruntime.cudaExternalMemoryHandleDesc* cymemHandleDesc_ptr = memHandleDesc._pvt_ptr if memHandleDesc is not None else NULL + cdef cyruntime.cudaExternalMemoryHandleDesc* cymemHandleDesc_ptr + cdef cudaExternalMemory_t extMem_out + extMem_out = cudaExternalMemory_t() + cymemHandleDesc_ptr = memHandleDesc._pvt_ptr if memHandleDesc is not None else NULL with nogil: err = cyruntime.cudaImportExternalMemory(extMem_out._pvt_ptr, cymemHandleDesc_ptr) if err != cyruntime.cudaSuccess: @@ -24553,7 +24619,9 @@ def cudaExternalMemoryGetMappedBuffer(extMem, bufferDesc : Optional[cudaExternal -------- :py:obj:`~.cudaImportExternalMemory`, :py:obj:`~.cudaDestroyExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedMipmappedArray` """ + cdef cyruntime.cudaExternalMemoryBufferDesc* cybufferDesc_ptr cdef cyruntime.cudaExternalMemory_t cyextMem + cdef void_ptr devPtr = 0 if extMem is None: pextMem = 0 elif isinstance(extMem, (cudaExternalMemory_t,)): @@ -24561,8 +24629,7 @@ def cudaExternalMemoryGetMappedBuffer(extMem, bufferDesc : Optional[cudaExternal else: pextMem = int(cudaExternalMemory_t(extMem)) cyextMem = pextMem - cdef void_ptr devPtr = 0 - cdef cyruntime.cudaExternalMemoryBufferDesc* cybufferDesc_ptr = bufferDesc._pvt_ptr if bufferDesc is not None else NULL + cybufferDesc_ptr = bufferDesc._pvt_ptr if bufferDesc is not None else NULL with nogil: err = cyruntime.cudaExternalMemoryGetMappedBuffer(&devPtr, cyextMem, cybufferDesc_ptr) if err != cyruntime.cudaSuccess: @@ -24625,7 +24692,10 @@ def cudaExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[cuda ----- If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, then :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.numLevels` must not be greater than 1. """ + cdef cyruntime.cudaExternalMemoryMipmappedArrayDesc* cymipmapDesc_ptr cdef cyruntime.cudaExternalMemory_t cyextMem + cdef cudaMipmappedArray_t mipmap + mipmap = cudaMipmappedArray_t() if extMem is None: pextMem = 0 elif isinstance(extMem, (cudaExternalMemory_t,)): @@ -24633,8 +24703,7 @@ def cudaExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[cuda else: pextMem = int(cudaExternalMemory_t(extMem)) cyextMem = pextMem - cdef cudaMipmappedArray_t mipmap = cudaMipmappedArray_t() - cdef cyruntime.cudaExternalMemoryMipmappedArrayDesc* cymipmapDesc_ptr = mipmapDesc._pvt_ptr if mipmapDesc is not None else NULL + cymipmapDesc_ptr = mipmapDesc._pvt_ptr if mipmapDesc is not None else NULL with nogil: err = cyruntime.cudaExternalMemoryGetMappedMipmappedArray(mipmap._pvt_ptr, cyextMem, cymipmapDesc_ptr) if err != cyruntime.cudaSuccess: @@ -24820,8 +24889,10 @@ def cudaImportExternalSemaphore(semHandleDesc : Optional[cudaExternalSemaphoreHa -------- :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync` """ - cdef cudaExternalSemaphore_t extSem_out = cudaExternalSemaphore_t() - cdef cyruntime.cudaExternalSemaphoreHandleDesc* cysemHandleDesc_ptr = semHandleDesc._pvt_ptr if semHandleDesc is not None else NULL + cdef cyruntime.cudaExternalSemaphoreHandleDesc* cysemHandleDesc_ptr + cdef cudaExternalSemaphore_t extSem_out + extSem_out = cudaExternalSemaphore_t() + cysemHandleDesc_ptr = semHandleDesc._pvt_ptr if semHandleDesc is not None else NULL with nogil: err = cyruntime.cudaImportExternalSemaphore(extSem_out._pvt_ptr, cysemHandleDesc_ptr) if err != cyruntime.cudaSuccess: @@ -24942,46 +25013,48 @@ def cudaSignalExternalSemaphoresAsync(extSemArray : Optional[tuple[cudaExternalS :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaWaitExternalSemaphoresAsync` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - paramsArray = [] if paramsArray is None else paramsArray - if not all(isinstance(_x, (cudaExternalSemaphoreSignalParams,)) for _x in paramsArray): - raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphoreSignalParams,] or list[cyruntime.cudaExternalSemaphoreSignalParams,]") - extSemArray = [] if extSemArray is None else extSemArray - if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray): - raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphore_t,] or list[cyruntime.cudaExternalSemaphore_t,]") - cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL - if len(extSemArray) > 1: - cyextSemArray = calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t)) - if cyextSemArray is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t))) - else: - for idx in range(len(extSemArray)): - cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] - elif len(extSemArray) == 1: - cyextSemArray = (extSemArray[0])._pvt_ptr cdef cyruntime.cudaExternalSemaphoreSignalParams* cyparamsArray = NULL - if len(paramsArray) > 1: - cyparamsArray = calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreSignalParams)) - if cyparamsArray is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams))) - for idx in range(len(paramsArray)): - string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams)) - elif len(paramsArray) == 1: - cyparamsArray = (paramsArray[0])._pvt_ptr - if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) - if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) - with nogil: - err = cyruntime.cudaSignalExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) - if len(extSemArray) > 1 and cyextSemArray is not NULL: - free(cyextSemArray) - if len(paramsArray) > 1 and cyparamsArray is not NULL: - free(cyparamsArray) + cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL + try: + extSemArray = [] if extSemArray is None else extSemArray + if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray): + raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphore_t,] or list[cyruntime.cudaExternalSemaphore_t,]") + if len(extSemArray) > 1: + cyextSemArray = calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t)) + if cyextSemArray is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t))) + else: + for idx in range(len(extSemArray)): + cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] + elif len(extSemArray) == 1: + cyextSemArray = (extSemArray[0])._pvt_ptr + paramsArray = [] if paramsArray is None else paramsArray + if not all(isinstance(_x, (cudaExternalSemaphoreSignalParams,)) for _x in paramsArray): + raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphoreSignalParams,] or list[cyruntime.cudaExternalSemaphoreSignalParams,]") + if len(paramsArray) > 1: + cyparamsArray = calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreSignalParams)) + if cyparamsArray is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams))) + for idx in range(len(paramsArray)): + string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams)) + elif len(paramsArray) == 1: + cyparamsArray = (paramsArray[0])._pvt_ptr + if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) + if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaSignalExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) + finally: + if len(extSemArray) > 1 and cyextSemArray is not NULL: + free(cyextSemArray) + if len(paramsArray) > 1 and cyparamsArray is not NULL: + free(cyparamsArray) return (_cudaError_t(err),) {{endif}} @@ -25071,46 +25144,48 @@ def cudaWaitExternalSemaphoresAsync(extSemArray : Optional[tuple[cudaExternalSem :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - paramsArray = [] if paramsArray is None else paramsArray - if not all(isinstance(_x, (cudaExternalSemaphoreWaitParams,)) for _x in paramsArray): - raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphoreWaitParams,] or list[cyruntime.cudaExternalSemaphoreWaitParams,]") - extSemArray = [] if extSemArray is None else extSemArray - if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray): - raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphore_t,] or list[cyruntime.cudaExternalSemaphore_t,]") - cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL - if len(extSemArray) > 1: - cyextSemArray = calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t)) - if cyextSemArray is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t))) - else: - for idx in range(len(extSemArray)): - cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] - elif len(extSemArray) == 1: - cyextSemArray = (extSemArray[0])._pvt_ptr cdef cyruntime.cudaExternalSemaphoreWaitParams* cyparamsArray = NULL - if len(paramsArray) > 1: - cyparamsArray = calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreWaitParams)) - if cyparamsArray is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams))) - for idx in range(len(paramsArray)): - string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams)) - elif len(paramsArray) == 1: - cyparamsArray = (paramsArray[0])._pvt_ptr - if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) - if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) - with nogil: - err = cyruntime.cudaWaitExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) - if len(extSemArray) > 1 and cyextSemArray is not NULL: - free(cyextSemArray) - if len(paramsArray) > 1 and cyparamsArray is not NULL: - free(cyparamsArray) + cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL + try: + extSemArray = [] if extSemArray is None else extSemArray + if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray): + raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphore_t,] or list[cyruntime.cudaExternalSemaphore_t,]") + if len(extSemArray) > 1: + cyextSemArray = calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t)) + if cyextSemArray is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t))) + else: + for idx in range(len(extSemArray)): + cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] + elif len(extSemArray) == 1: + cyextSemArray = (extSemArray[0])._pvt_ptr + paramsArray = [] if paramsArray is None else paramsArray + if not all(isinstance(_x, (cudaExternalSemaphoreWaitParams,)) for _x in paramsArray): + raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphoreWaitParams,] or list[cyruntime.cudaExternalSemaphoreWaitParams,]") + if len(paramsArray) > 1: + cyparamsArray = calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreWaitParams)) + if cyparamsArray is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams))) + for idx in range(len(paramsArray)): + string.memcpy(&cyparamsArray[idx], (paramsArray[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams)) + elif len(paramsArray) == 1: + cyparamsArray = (paramsArray[0])._pvt_ptr + if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems)) + if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems)) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaWaitExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream) + finally: + if len(extSemArray) > 1 and cyextSemArray is not NULL: + free(cyextSemArray) + if len(paramsArray) > 1 and cyparamsArray is not NULL: + free(cyparamsArray) return (_cudaError_t(err),) {{endif}} @@ -25210,12 +25285,16 @@ def cudaFuncSetCacheConfig(func, cacheConfig not None : cudaFuncCache): ----- This API does not accept a :py:obj:`~.cudaKernel_t` casted as void*. If cache config modification is required for a :py:obj:`~.cudaKernel_t` (or a global function), it can be replaced with a call to :py:obj:`~.cudaFuncSetAttributes` with the attribute :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout` to specify a more granular L1 cache and shared memory split configuration. """ + cdef cyruntime.cudaFuncCache cycacheConfig cdef _HelperInputVoidPtrStruct cyfuncHelper - cdef void* cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) - cdef cyruntime.cudaFuncCache cycacheConfig = int(cacheConfig) - with nogil: - err = cyruntime.cudaFuncSetCacheConfig(cyfunc, cycacheConfig) - _helper_input_void_ptr_free(&cyfuncHelper) + cdef void* cyfunc + try: + cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) + cycacheConfig = int(cacheConfig) + with nogil: + err = cyruntime.cudaFuncSetCacheConfig(cyfunc, cycacheConfig) + finally: + _helper_input_void_ptr_free(&cyfuncHelper) return (_cudaError_t(err),) {{endif}} @@ -25253,12 +25332,16 @@ def cudaFuncGetAttributes(func): -------- :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncGetAttributes (C++ API), :py:obj:`~.cudaLaunchKernel (C API)`, :py:obj:`~.cuFuncGetAttribute` """ - cdef cudaFuncAttributes attr = cudaFuncAttributes() cdef _HelperInputVoidPtrStruct cyfuncHelper - cdef void* cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) - with nogil: - err = cyruntime.cudaFuncGetAttributes(attr._pvt_ptr, cyfunc) - _helper_input_void_ptr_free(&cyfuncHelper) + cdef void* cyfunc + cdef cudaFuncAttributes attr + try: + attr = cudaFuncAttributes() + cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) + with nogil: + err = cyruntime.cudaFuncGetAttributes(attr._pvt_ptr, cyfunc) + finally: + _helper_input_void_ptr_free(&cyfuncHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, attr) @@ -25342,12 +25425,16 @@ def cudaFuncSetAttribute(func, attr not None : cudaFuncAttribute, int value): cudaError_t :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue` """ + cdef cyruntime.cudaFuncAttribute cyattr cdef _HelperInputVoidPtrStruct cyfuncHelper - cdef void* cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) - cdef cyruntime.cudaFuncAttribute cyattr = int(attr) - with nogil: - err = cyruntime.cudaFuncSetAttribute(cyfunc, cyattr, value) - _helper_input_void_ptr_free(&cyfuncHelper) + cdef void* cyfunc + try: + cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) + cyattr = int(attr) + with nogil: + err = cyruntime.cudaFuncSetAttribute(cyfunc, cyattr, value) + finally: + _helper_input_void_ptr_free(&cyfuncHelper) return (_cudaError_t(err),) {{endif}} @@ -25428,37 +25515,39 @@ def cudaLaunchHostFunc(stream, fn, userData): -------- :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaStreamAttachMemAsync`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cuLaunchHostFunc` """ - cdef cyruntime.cudaHostFn_t cyfn - if fn is None: - pfn = 0 - elif isinstance(fn, (cudaHostFn_t,)): - pfn = int(fn) - else: - pfn = int(cudaHostFn_t(fn)) - cyfn = pfn - cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef _HelperInputVoidPtrStruct cyuserDataHelper - cdef void* cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) - + cdef void* cyuserData cdef cudaStreamHostCallbackData *cbData = NULL - cbData = malloc(sizeof(cbData[0])) - if cbData == NULL: - return (cudaError_t.cudaErrorMemoryAllocation,) - cbData.callback = cyfn - cbData.userData = cyuserData - - with nogil: - err = cyruntime.cudaLaunchHostFunc(cystream, cudaStreamRtHostCallbackWrapper, cbData) - if err != cyruntime.cudaSuccess: - free(cbData) - _helper_input_void_ptr_free(&cyuserDataHelper) + cdef cyruntime.cudaHostFn_t cyfn + cdef cyruntime.cudaStream_t cystream + try: + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + if fn is None: + pfn = 0 + elif isinstance(fn, (cudaHostFn_t,)): + pfn = int(fn) + else: + pfn = int(cudaHostFn_t(fn)) + cyfn = pfn + cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) + cbData = malloc(sizeof(cbData[0])) + if cbData == NULL: + return (cudaError_t.cudaErrorMemoryAllocation,) + cbData.callback = cyfn + cbData.userData = cyuserData + + with nogil: + err = cyruntime.cudaLaunchHostFunc(cystream, cudaStreamRtHostCallbackWrapper, cbData) + finally: + if err != cyruntime.cudaSuccess: + free(cbData) + _helper_input_void_ptr_free(&cyuserDataHelper) return (_cudaError_t(err),) {{endif}} @@ -25523,12 +25612,16 @@ def cudaFuncSetSharedMemConfig(func, config not None : cudaSharedMemConfig): -------- :py:obj:`~.cudaDeviceSetSharedMemConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig`, :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuFuncSetSharedMemConfig` """ + cdef cyruntime.cudaSharedMemConfig cyconfig cdef _HelperInputVoidPtrStruct cyfuncHelper - cdef void* cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) - cdef cyruntime.cudaSharedMemConfig cyconfig = int(config) - with nogil: - err = cyruntime.cudaFuncSetSharedMemConfig(cyfunc, cyconfig) - _helper_input_void_ptr_free(&cyfuncHelper) + cdef void* cyfunc + try: + cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) + cyconfig = int(config) + with nogil: + err = cyruntime.cudaFuncSetSharedMemConfig(cyfunc, cyconfig) + finally: + _helper_input_void_ptr_free(&cyfuncHelper) return (_cudaError_t(err),) {{endif}} @@ -25561,12 +25654,15 @@ def cudaOccupancyMaxActiveBlocksPerMultiprocessor(func, int blockSize, size_t dy -------- :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), cudaOccupancyAvailableDynamicSMemPerBlock (C++ API), :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessor` """ - cdef int numBlocks = 0 cdef _HelperInputVoidPtrStruct cyfuncHelper - cdef void* cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) - with nogil: - err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc, blockSize, dynamicSMemSize) - _helper_input_void_ptr_free(&cyfuncHelper) + cdef void* cyfunc + cdef int numBlocks = 0 + try: + cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) + with nogil: + err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc, blockSize, dynamicSMemSize) + finally: + _helper_input_void_ptr_free(&cyfuncHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, numBlocks) @@ -25601,12 +25697,15 @@ def cudaOccupancyAvailableDynamicSMemPerBlock(func, int numBlocks, int blockSize -------- :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), :py:obj:`~.cudaOccupancyAvailableDynamicSMemPerBlock` """ - cdef size_t dynamicSmemSize = 0 cdef _HelperInputVoidPtrStruct cyfuncHelper - cdef void* cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) - with nogil: - err = cyruntime.cudaOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc, numBlocks, blockSize) - _helper_input_void_ptr_free(&cyfuncHelper) + cdef void* cyfunc + cdef size_t dynamicSmemSize = 0 + try: + cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) + with nogil: + err = cyruntime.cudaOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc, numBlocks, blockSize) + finally: + _helper_input_void_ptr_free(&cyfuncHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, dynamicSmemSize) @@ -25658,12 +25757,15 @@ def cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(func, int blockSize, -------- :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), cudaOccupancyAvailableDynamicSMemPerBlock (C++ API), :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags` """ - cdef int numBlocks = 0 cdef _HelperInputVoidPtrStruct cyfuncHelper - cdef void* cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) - with nogil: - err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc, blockSize, dynamicSMemSize, flags) - _helper_input_void_ptr_free(&cyfuncHelper) + cdef void* cyfunc + cdef int numBlocks = 0 + try: + cyfunc = _helper_input_void_ptr(func, &cyfuncHelper) + with nogil: + err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc, blockSize, dynamicSMemSize, flags) + finally: + _helper_input_void_ptr_free(&cyfuncHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, numBlocks) @@ -25937,8 +26039,8 @@ def cudaMallocPitch(size_t width, size_t height): -------- :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAllocPitch` """ - cdef void_ptr devPtr = 0 cdef size_t pitch = 0 + cdef void_ptr devPtr = 0 with nogil: err = cyruntime.cudaMallocPitch(&devPtr, &pitch, width, height) if err != cyruntime.cudaSuccess: @@ -26014,8 +26116,10 @@ def cudaMallocArray(desc : Optional[cudaChannelFormatDesc], size_t width, size_t -------- :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuArrayCreate` """ - cdef cudaArray_t array = cudaArray_t() - cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc is not None else NULL + cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr + cdef cudaArray_t array + array = cudaArray_t() + cydesc_ptr = desc._pvt_ptr if desc is not None else NULL with nogil: err = cyruntime.cudaMallocArray(array._pvt_ptr, cydesc_ptr, width, height, flags) if err != cyruntime.cudaSuccess: @@ -26067,10 +26171,13 @@ def cudaFree(devPtr): :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaMallocFromPoolAsync` :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaFreeAsync` :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemFree` """ cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaFree(cydevPtr) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + try: + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + with nogil: + err = cyruntime.cudaFree(cydevPtr) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) return (_cudaError_t(err),) {{endif}} @@ -26099,10 +26206,13 @@ def cudaFreeHost(ptr): :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemFreeHost` """ cdef _HelperInputVoidPtrStruct cyptrHelper - cdef void* cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) - with nogil: - err = cyruntime.cudaFreeHost(cyptr) - _helper_input_void_ptr_free(&cyptrHelper) + cdef void* cyptr + try: + cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) + with nogil: + err = cyruntime.cudaFreeHost(cyptr) + finally: + _helper_input_void_ptr_free(&cyptrHelper) return (_cudaError_t(err),) {{endif}} @@ -26376,10 +26486,13 @@ def cudaHostRegister(ptr, size_t size, unsigned int flags): :py:obj:`~.cudaHostUnregister`, :py:obj:`~.cudaHostGetFlags`, :py:obj:`~.cudaHostGetDevicePointer`, :py:obj:`~.cuMemHostRegister` """ cdef _HelperInputVoidPtrStruct cyptrHelper - cdef void* cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) - with nogil: - err = cyruntime.cudaHostRegister(cyptr, size, flags) - _helper_input_void_ptr_free(&cyptrHelper) + cdef void* cyptr + try: + cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) + with nogil: + err = cyruntime.cudaHostRegister(cyptr, size, flags) + finally: + _helper_input_void_ptr_free(&cyptrHelper) return (_cudaError_t(err),) {{endif}} @@ -26410,10 +26523,13 @@ def cudaHostUnregister(ptr): :py:obj:`~.cudaHostUnregister`, :py:obj:`~.cuMemHostUnregister` """ cdef _HelperInputVoidPtrStruct cyptrHelper - cdef void* cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) - with nogil: - err = cyruntime.cudaHostUnregister(cyptr) - _helper_input_void_ptr_free(&cyptrHelper) + cdef void* cyptr + try: + cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) + with nogil: + err = cyruntime.cudaHostUnregister(cyptr) + finally: + _helper_input_void_ptr_free(&cyptrHelper) return (_cudaError_t(err),) {{endif}} @@ -26469,12 +26585,15 @@ def cudaHostGetDevicePointer(pHost, unsigned int flags): -------- :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer` """ - cdef void_ptr pDevice = 0 cdef _HelperInputVoidPtrStruct cypHostHelper - cdef void* cypHost = _helper_input_void_ptr(pHost, &cypHostHelper) - with nogil: - err = cyruntime.cudaHostGetDevicePointer(&pDevice, cypHost, flags) - _helper_input_void_ptr_free(&cypHostHelper) + cdef void* cypHost + cdef void_ptr pDevice = 0 + try: + cypHost = _helper_input_void_ptr(pHost, &cypHostHelper) + with nogil: + err = cyruntime.cudaHostGetDevicePointer(&pDevice, cypHost, flags) + finally: + _helper_input_void_ptr_free(&cypHostHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pDevice) @@ -26505,12 +26624,15 @@ def cudaHostGetFlags(pHost): -------- :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemHostGetFlags` """ - cdef unsigned int pFlags = 0 cdef _HelperInputVoidPtrStruct cypHostHelper - cdef void* cypHost = _helper_input_void_ptr(pHost, &cypHostHelper) - with nogil: - err = cyruntime.cudaHostGetFlags(&pFlags, cypHost) - _helper_input_void_ptr_free(&cypHostHelper) + cdef void* cypHost + cdef unsigned int pFlags = 0 + try: + cypHost = _helper_input_void_ptr(pHost, &cypHostHelper) + with nogil: + err = cyruntime.cudaHostGetFlags(&pFlags, cypHost) + finally: + _helper_input_void_ptr_free(&cypHostHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pFlags) @@ -26557,7 +26679,8 @@ def cudaMalloc3D(extent not None : cudaExtent): -------- :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMemAllocPitch` """ - cdef cudaPitchedPtr pitchedDevPtr = cudaPitchedPtr() + cdef cudaPitchedPtr pitchedDevPtr + pitchedDevPtr = cudaPitchedPtr() with nogil: err = cyruntime.cudaMalloc3D(pitchedDevPtr._pvt_ptr, extent._pvt_ptr[0]) if err != cyruntime.cudaSuccess: @@ -26679,8 +26802,10 @@ def cudaMalloc3DArray(desc : Optional[cudaChannelFormatDesc], extent not None : -------- :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuArray3DCreate` """ - cdef cudaArray_t array = cudaArray_t() - cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc is not None else NULL + cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr + cdef cudaArray_t array + array = cudaArray_t() + cydesc_ptr = desc._pvt_ptr if desc is not None else NULL with nogil: err = cyruntime.cudaMalloc3DArray(array._pvt_ptr, cydesc_ptr, extent._pvt_ptr[0], flags) if err != cyruntime.cudaSuccess: @@ -26805,8 +26930,10 @@ def cudaMallocMipmappedArray(desc : Optional[cudaChannelFormatDesc], extent not -------- :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMipmappedArrayCreate` """ - cdef cudaMipmappedArray_t mipmappedArray = cudaMipmappedArray_t() - cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc is not None else NULL + cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr + cdef cudaMipmappedArray_t mipmappedArray + mipmappedArray = cudaMipmappedArray_t() + cydesc_ptr = desc._pvt_ptr if desc is not None else NULL with nogil: err = cyruntime.cudaMallocMipmappedArray(mipmappedArray._pvt_ptr, cydesc_ptr, extent._pvt_ptr[0], numLevels, flags) if err != cyruntime.cudaSuccess: @@ -26848,6 +26975,8 @@ def cudaGetMipmappedArrayLevel(mipmappedArray, unsigned int level): :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMipmappedArrayGetLevel` """ cdef cyruntime.cudaMipmappedArray_const_t cymipmappedArray + cdef cudaArray_t levelArray + levelArray = cudaArray_t() if mipmappedArray is None: pmipmappedArray = 0 elif isinstance(mipmappedArray, (cudaMipmappedArray_const_t,)): @@ -26855,7 +26984,6 @@ def cudaGetMipmappedArrayLevel(mipmappedArray, unsigned int level): else: pmipmappedArray = int(cudaMipmappedArray_const_t(mipmappedArray)) cymipmappedArray = pmipmappedArray - cdef cudaArray_t levelArray = cudaArray_t() with nogil: err = cyruntime.cudaGetMipmappedArrayLevel(levelArray._pvt_ptr, cymipmappedArray, level) if err != cyruntime.cudaSuccess: @@ -26941,7 +27069,8 @@ def cudaMemcpy3D(p : Optional[cudaMemcpy3DParms]): -------- :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemcpy3DAsync`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.make_cudaPos`, :py:obj:`~.cuMemcpy3D` """ - cdef cyruntime.cudaMemcpy3DParms* cyp_ptr = p._pvt_ptr if p is not None else NULL + cdef cyruntime.cudaMemcpy3DParms* cyp_ptr + cyp_ptr = p._pvt_ptr if p is not None else NULL with nogil: err = cyruntime.cudaMemcpy3D(cyp_ptr) return (_cudaError_t(err),) @@ -26978,7 +27107,8 @@ def cudaMemcpy3DPeer(p : Optional[cudaMemcpy3DPeerParms]): -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpy3DPeer` """ - cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr = p._pvt_ptr if p is not None else NULL + cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr + cyp_ptr = p._pvt_ptr if p is not None else NULL with nogil: err = cyruntime.cudaMemcpy3DPeer(cyp_ptr) return (_cudaError_t(err),) @@ -27076,6 +27206,8 @@ def cudaMemcpy3DAsync(p : Optional[cudaMemcpy3DParms], stream): :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, ::::py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.make_cudaPos`, :py:obj:`~.cuMemcpy3DAsync` """ cdef cyruntime.cudaStream_t cystream + cdef cyruntime.cudaMemcpy3DParms* cyp_ptr + cyp_ptr = p._pvt_ptr if p is not None else NULL if stream is None: pstream = 0 elif isinstance(stream, (cudaStream_t,driver.CUstream)): @@ -27083,7 +27215,6 @@ def cudaMemcpy3DAsync(p : Optional[cudaMemcpy3DParms], stream): else: pstream = int(cudaStream_t(stream)) cystream = pstream - cdef cyruntime.cudaMemcpy3DParms* cyp_ptr = p._pvt_ptr if p is not None else NULL with nogil: err = cyruntime.cudaMemcpy3DAsync(cyp_ptr, cystream) return (_cudaError_t(err),) @@ -27116,6 +27247,8 @@ def cudaMemcpy3DPeerAsync(p : Optional[cudaMemcpy3DPeerParms], stream): :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync` """ cdef cyruntime.cudaStream_t cystream + cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr + cyp_ptr = p._pvt_ptr if p is not None else NULL if stream is None: pstream = 0 elif isinstance(stream, (cudaStream_t,driver.CUstream)): @@ -27123,7 +27256,6 @@ def cudaMemcpy3DPeerAsync(p : Optional[cudaMemcpy3DPeerParms], stream): else: pstream = int(cudaStream_t(stream)) cystream = pstream - cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr = p._pvt_ptr if p is not None else NULL with nogil: err = cyruntime.cudaMemcpy3DPeerAsync(cyp_ptr, cystream) return (_cudaError_t(err),) @@ -27165,8 +27297,8 @@ def cudaMemGetInfo(): -------- :py:obj:`~.cuMemGetInfo` """ - cdef size_t free = 0 cdef size_t total = 0 + cdef size_t free = 0 with nogil: err = cyruntime.cudaMemGetInfo(&free, &total) if err != cyruntime.cudaSuccess: @@ -27206,6 +27338,11 @@ def cudaArrayGetInfo(array): :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuArray3DGetDescriptor` """ cdef cyruntime.cudaArray_t cyarray + cdef unsigned int flags = 0 + cdef cudaExtent extent + cdef cudaChannelFormatDesc desc + desc = cudaChannelFormatDesc() + extent = cudaExtent() if array is None: parray = 0 elif isinstance(array, (cudaArray_t,)): @@ -27213,9 +27350,6 @@ def cudaArrayGetInfo(array): else: parray = int(cudaArray_t(array)) cyarray = parray - cdef cudaChannelFormatDesc desc = cudaChannelFormatDesc() - cdef cudaExtent extent = cudaExtent() - cdef unsigned int flags = 0 with nogil: err = cyruntime.cudaArrayGetInfo(desc._pvt_ptr, extent._pvt_ptr, &flags, cyarray) if err != cyruntime.cudaSuccess: @@ -27264,6 +27398,8 @@ def cudaArrayGetPlane(hArray, unsigned int planeIdx): :py:obj:`~.cuArrayGetPlane` """ cdef cyruntime.cudaArray_t cyhArray + cdef cudaArray_t pPlaneArray + pPlaneArray = cudaArray_t() if hArray is None: phArray = 0 elif isinstance(hArray, (cudaArray_t,)): @@ -27271,7 +27407,6 @@ def cudaArrayGetPlane(hArray, unsigned int planeIdx): else: phArray = int(cudaArray_t(hArray)) cyhArray = phArray - cdef cudaArray_t pPlaneArray = cudaArray_t() with nogil: err = cyruntime.cudaArrayGetPlane(pPlaneArray._pvt_ptr, cyhArray, planeIdx) if err != cyruntime.cudaSuccess: @@ -27314,6 +27449,8 @@ def cudaArrayGetMemoryRequirements(array, int device): :py:obj:`~.cudaMipmappedArrayGetMemoryRequirements` """ cdef cyruntime.cudaArray_t cyarray + cdef cudaArrayMemoryRequirements memoryRequirements + memoryRequirements = cudaArrayMemoryRequirements() if array is None: parray = 0 elif isinstance(array, (cudaArray_t,)): @@ -27321,7 +27458,6 @@ def cudaArrayGetMemoryRequirements(array, int device): else: parray = int(cudaArray_t(array)) cyarray = parray - cdef cudaArrayMemoryRequirements memoryRequirements = cudaArrayMemoryRequirements() with nogil: err = cyruntime.cudaArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cyarray, device) if err != cyruntime.cudaSuccess: @@ -27364,6 +27500,8 @@ def cudaMipmappedArrayGetMemoryRequirements(mipmap, int device): :py:obj:`~.cudaArrayGetMemoryRequirements` """ cdef cyruntime.cudaMipmappedArray_t cymipmap + cdef cudaArrayMemoryRequirements memoryRequirements + memoryRequirements = cudaArrayMemoryRequirements() if mipmap is None: pmipmap = 0 elif isinstance(mipmap, (cudaMipmappedArray_t,)): @@ -27371,7 +27509,6 @@ def cudaMipmappedArrayGetMemoryRequirements(mipmap, int device): else: pmipmap = int(cudaMipmappedArray_t(mipmap)) cymipmap = pmipmap - cdef cudaArrayMemoryRequirements memoryRequirements = cudaArrayMemoryRequirements() with nogil: err = cyruntime.cudaMipmappedArrayGetMemoryRequirements(memoryRequirements._pvt_ptr, cymipmap, device) if err != cyruntime.cudaSuccess: @@ -27420,6 +27557,8 @@ def cudaArrayGetSparseProperties(array): :py:obj:`~.cudaMipmappedArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync` """ cdef cyruntime.cudaArray_t cyarray + cdef cudaArraySparseProperties sparseProperties + sparseProperties = cudaArraySparseProperties() if array is None: parray = 0 elif isinstance(array, (cudaArray_t,)): @@ -27427,7 +27566,6 @@ def cudaArrayGetSparseProperties(array): else: parray = int(cudaArray_t(array)) cyarray = parray - cdef cudaArraySparseProperties sparseProperties = cudaArraySparseProperties() with nogil: err = cyruntime.cudaArrayGetSparseProperties(sparseProperties._pvt_ptr, cyarray) if err != cyruntime.cudaSuccess: @@ -27476,6 +27614,8 @@ def cudaMipmappedArrayGetSparseProperties(mipmap): :py:obj:`~.cudaArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync` """ cdef cyruntime.cudaMipmappedArray_t cymipmap + cdef cudaArraySparseProperties sparseProperties + sparseProperties = cudaArraySparseProperties() if mipmap is None: pmipmap = 0 elif isinstance(mipmap, (cudaMipmappedArray_t,)): @@ -27483,7 +27623,6 @@ def cudaMipmappedArrayGetSparseProperties(mipmap): else: pmipmap = int(cudaMipmappedArray_t(mipmap)) cymipmap = pmipmap - cdef cudaArraySparseProperties sparseProperties = cudaArraySparseProperties() with nogil: err = cyruntime.cudaMipmappedArrayGetSparseProperties(sparseProperties._pvt_ptr, cymipmap) if err != cyruntime.cudaSuccess: @@ -27531,15 +27670,20 @@ def cudaMemcpy(dst, src, size_t count, kind not None : cudaMemcpyKind): -------- :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpy` """ - cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) + cdef cyruntime.cudaMemcpyKind cykind cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpy(cydst, cysrc, count, cykind) - _helper_input_void_ptr_free(&cydstHelper) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef _HelperInputVoidPtrStruct cydstHelper + cdef void* cydst + try: + cydst = _helper_input_void_ptr(dst, &cydstHelper) + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + with nogil: + err = cyruntime.cudaMemcpy(cydst, cysrc, count, cykind) + finally: + _helper_input_void_ptr_free(&cydstHelper) + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -27582,14 +27726,18 @@ def cudaMemcpyPeer(dst, int dstDevice, src, int srcDevice, size_t count): -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpyPeer` """ - cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - with nogil: - err = cyruntime.cudaMemcpyPeer(cydst, dstDevice, cysrc, srcDevice, count) - _helper_input_void_ptr_free(&cydstHelper) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef _HelperInputVoidPtrStruct cydstHelper + cdef void* cydst + try: + cydst = _helper_input_void_ptr(dst, &cydstHelper) + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + with nogil: + err = cyruntime.cudaMemcpyPeer(cydst, dstDevice, cysrc, srcDevice, count) + finally: + _helper_input_void_ptr_free(&cydstHelper) + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -27643,15 +27791,20 @@ def cudaMemcpy2D(dst, size_t dpitch, src, size_t spitch, size_t width, size_t he -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned` """ - cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) + cdef cyruntime.cudaMemcpyKind cykind cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpy2D(cydst, dpitch, cysrc, spitch, width, height, cykind) - _helper_input_void_ptr_free(&cydstHelper) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef _HelperInputVoidPtrStruct cydstHelper + cdef void* cydst + try: + cydst = _helper_input_void_ptr(dst, &cydstHelper) + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + with nogil: + err = cyruntime.cudaMemcpy2D(cydst, dpitch, cysrc, spitch, width, height, cykind) + finally: + _helper_input_void_ptr_free(&cydstHelper) + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -27706,20 +27859,24 @@ def cudaMemcpy2DToArray(dst, size_t wOffset, size_t hOffset, src, size_t spitch, -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned` """ - cdef cyruntime.cudaArray_t cydst - if dst is None: - pdst = 0 - elif isinstance(dst, (cudaArray_t,)): - pdst = int(dst) - else: - pdst = int(cudaArray_t(dst)) - cydst = pdst + cdef cyruntime.cudaMemcpyKind cykind cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpy2DToArray(cydst, wOffset, hOffset, cysrc, spitch, width, height, cykind) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef cyruntime.cudaArray_t cydst + try: + if dst is None: + pdst = 0 + elif isinstance(dst, (cudaArray_t,)): + pdst = int(dst) + else: + pdst = int(cudaArray_t(dst)) + cydst = pdst + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + with nogil: + err = cyruntime.cudaMemcpy2DToArray(cydst, wOffset, hOffset, cysrc, spitch, width, height, cykind) + finally: + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -27774,20 +27931,24 @@ def cudaMemcpy2DFromArray(dst, size_t dpitch, src, size_t wOffset, size_t hOffse -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned` """ + cdef cyruntime.cudaMemcpyKind cykind cdef cyruntime.cudaArray_const_t cysrc - if src is None: - psrc = 0 - elif isinstance(src, (cudaArray_const_t,)): - psrc = int(src) - else: - psrc = int(cudaArray_const_t(src)) - cysrc = psrc cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpy2DFromArray(cydst, dpitch, cysrc, wOffset, hOffset, width, height, cykind) - _helper_input_void_ptr_free(&cydstHelper) + cdef void* cydst + try: + cydst = _helper_input_void_ptr(dst, &cydstHelper) + if src is None: + psrc = 0 + elif isinstance(src, (cudaArray_const_t,)): + psrc = int(src) + else: + psrc = int(cudaArray_const_t(src)) + cysrc = psrc + cykind = int(kind) + with nogil: + err = cyruntime.cudaMemcpy2DFromArray(cydst, dpitch, cysrc, wOffset, hOffset, width, height, cykind) + finally: + _helper_input_void_ptr_free(&cydstHelper) return (_cudaError_t(err),) {{endif}} @@ -27842,14 +28003,8 @@ def cudaMemcpy2DArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, siz -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned` """ + cdef cyruntime.cudaMemcpyKind cykind cdef cyruntime.cudaArray_const_t cysrc - if src is None: - psrc = 0 - elif isinstance(src, (cudaArray_const_t,)): - psrc = int(src) - else: - psrc = int(cudaArray_const_t(src)) - cysrc = psrc cdef cyruntime.cudaArray_t cydst if dst is None: pdst = 0 @@ -27858,7 +28013,14 @@ def cudaMemcpy2DArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, siz else: pdst = int(cudaArray_t(dst)) cydst = pdst - cdef cyruntime.cudaMemcpyKind cykind = int(kind) + if src is None: + psrc = 0 + elif isinstance(src, (cudaArray_const_t,)): + psrc = int(src) + else: + psrc = int(cudaArray_const_t(src)) + cysrc = psrc + cykind = int(kind) with nogil: err = cyruntime.cudaMemcpy2DArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, width, height, cykind) return (_cudaError_t(err),) @@ -27917,22 +28079,27 @@ def cudaMemcpyAsync(dst, src, size_t count, kind not None : cudaMemcpyKind, stre :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAsync`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemcpyDtoDAsync` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) + cdef cyruntime.cudaMemcpyKind cykind cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpyAsync(cydst, cysrc, count, cykind, cystream) - _helper_input_void_ptr_free(&cydstHelper) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef _HelperInputVoidPtrStruct cydstHelper + cdef void* cydst + try: + cydst = _helper_input_void_ptr(dst, &cydstHelper) + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemcpyAsync(cydst, cysrc, count, cykind, cystream) + finally: + _helper_input_void_ptr_free(&cydstHelper) + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -27976,21 +28143,25 @@ def cudaMemcpyPeerAsync(dst, int dstDevice, src, int srcDevice, size_t count, st :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpyPeerAsync` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - with nogil: - err = cyruntime.cudaMemcpyPeerAsync(cydst, dstDevice, cysrc, srcDevice, count, cystream) - _helper_input_void_ptr_free(&cydstHelper) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef _HelperInputVoidPtrStruct cydstHelper + cdef void* cydst + try: + cydst = _helper_input_void_ptr(dst, &cydstHelper) + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemcpyPeerAsync(cydst, dstDevice, cysrc, srcDevice, count, cystream) + finally: + _helper_input_void_ptr_free(&cydstHelper) + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -28099,48 +28270,56 @@ def cudaMemcpyBatchAsync(dsts : Optional[tuple[Any] | list[Any]], srcs : Optiona :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - if not all(isinstance(_x, (int)) for _x in attrsIdxs): - raise TypeError("Argument 'attrsIdxs' is not instance of type (expected tuple[int] or list[int]") - attrs = [] if attrs is None else attrs - if not all(isinstance(_x, (cudaMemcpyAttributes,)) for _x in attrs): - raise TypeError("Argument 'attrs' is not instance of type (expected tuple[cyruntime.cudaMemcpyAttributes,] or list[cyruntime.cudaMemcpyAttributes,]") - if not all(isinstance(_x, (int)) for _x in sizes): - raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") - srcs = [] if srcs is None else srcs - dsts = [] if dsts is None else dsts - pylist = [_HelperInputVoidPtr(pydsts) for pydsts in dsts] - cdef _InputVoidPtrPtrHelper voidStarHelperdsts = _InputVoidPtrPtrHelper(pylist) - cdef const void** cydsts_ptr = voidStarHelperdsts.cptr - pylist = [_HelperInputVoidPtr(pysrcs) for pysrcs in srcs] - cdef _InputVoidPtrPtrHelper voidStarHelpersrcs = _InputVoidPtrPtrHelper(pylist) - cdef const void** cysrcs_ptr = voidStarHelpersrcs.cptr - cdef vector[size_t] cysizes = sizes - if count > len(dsts): raise RuntimeError("List is too small: " + str(len(dsts)) + " < " + str(count)) - if count > len(srcs): raise RuntimeError("List is too small: " + str(len(srcs)) + " < " + str(count)) - if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + cdef vector[size_t] cyattrsIdxs cdef cyruntime.cudaMemcpyAttributes* cyattrs = NULL - if len(attrs) > 1: - cyattrs = calloc(len(attrs), sizeof(cyruntime.cudaMemcpyAttributes)) - if cyattrs is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(attrs)) + 'x' + str(sizeof(cyruntime.cudaMemcpyAttributes))) - for idx in range(len(attrs)): - string.memcpy(&cyattrs[idx], (attrs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemcpyAttributes)) - elif len(attrs) == 1: - cyattrs = (attrs[0])._pvt_ptr - cdef vector[size_t] cyattrsIdxs = attrsIdxs - if numAttrs > len(attrs): raise RuntimeError("List is too small: " + str(len(attrs)) + " < " + str(numAttrs)) - if numAttrs > len(attrsIdxs): raise RuntimeError("List is too small: " + str(len(attrsIdxs)) + " < " + str(numAttrs)) - with nogil: - err = cyruntime.cudaMemcpyBatchAsync(cydsts_ptr, cysrcs_ptr, cysizes.data(), count, cyattrs, cyattrsIdxs.data(), numAttrs, cystream) - if len(attrs) > 1 and cyattrs is not NULL: - free(cyattrs) + cdef vector[size_t] cysizes + cdef _InputVoidPtrPtrHelper voidStarHelpersrcs + cdef const void** cysrcs_ptr + cdef _InputVoidPtrPtrHelper voidStarHelperdsts + cdef const void** cydsts_ptr + try: + dsts = [] if dsts is None else dsts + pylist = [_HelperInputVoidPtr(pydsts) for pydsts in dsts] + voidStarHelperdsts = _InputVoidPtrPtrHelper(pylist) + cydsts_ptr = voidStarHelperdsts.cptr + srcs = [] if srcs is None else srcs + pylist = [_HelperInputVoidPtr(pysrcs) for pysrcs in srcs] + voidStarHelpersrcs = _InputVoidPtrPtrHelper(pylist) + cysrcs_ptr = voidStarHelpersrcs.cptr + if not all(isinstance(_x, (int)) for _x in sizes): + raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") + cysizes = sizes + if count > len(dsts): raise RuntimeError("List is too small: " + str(len(dsts)) + " < " + str(count)) + if count > len(srcs): raise RuntimeError("List is too small: " + str(len(srcs)) + " < " + str(count)) + if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + attrs = [] if attrs is None else attrs + if not all(isinstance(_x, (cudaMemcpyAttributes,)) for _x in attrs): + raise TypeError("Argument 'attrs' is not instance of type (expected tuple[cyruntime.cudaMemcpyAttributes,] or list[cyruntime.cudaMemcpyAttributes,]") + if len(attrs) > 1: + cyattrs = calloc(len(attrs), sizeof(cyruntime.cudaMemcpyAttributes)) + if cyattrs is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(attrs)) + 'x' + str(sizeof(cyruntime.cudaMemcpyAttributes))) + for idx in range(len(attrs)): + string.memcpy(&cyattrs[idx], (attrs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemcpyAttributes)) + elif len(attrs) == 1: + cyattrs = (attrs[0])._pvt_ptr + if not all(isinstance(_x, (int)) for _x in attrsIdxs): + raise TypeError("Argument 'attrsIdxs' is not instance of type (expected tuple[int] or list[int]") + cyattrsIdxs = attrsIdxs + if numAttrs > len(attrs): raise RuntimeError("List is too small: " + str(len(attrs)) + " < " + str(numAttrs)) + if numAttrs > len(attrsIdxs): raise RuntimeError("List is too small: " + str(len(attrsIdxs)) + " < " + str(numAttrs)) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemcpyBatchAsync(cydsts_ptr, cysrcs_ptr, cysizes.data(), count, cyattrs, cyattrsIdxs.data(), numAttrs, cystream) + finally: + if len(attrs) > 1 and cyattrs is not NULL: + free(cyattrs) return (_cudaError_t(err),) {{endif}} @@ -28247,30 +28426,32 @@ def cudaMemcpy3DBatchAsync(size_t numOps, opList : Optional[tuple[cudaMemcpy3DBa :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - opList = [] if opList is None else opList - if not all(isinstance(_x, (cudaMemcpy3DBatchOp,)) for _x in opList): - raise TypeError("Argument 'opList' is not instance of type (expected tuple[cyruntime.cudaMemcpy3DBatchOp,] or list[cyruntime.cudaMemcpy3DBatchOp,]") - if numOps > len(opList): raise RuntimeError("List is too small: " + str(len(opList)) + " < " + str(numOps)) cdef cyruntime.cudaMemcpy3DBatchOp* cyopList = NULL - if len(opList) > 1: - cyopList = calloc(len(opList), sizeof(cyruntime.cudaMemcpy3DBatchOp)) - if cyopList is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(opList)) + 'x' + str(sizeof(cyruntime.cudaMemcpy3DBatchOp))) - for idx in range(len(opList)): - string.memcpy(&cyopList[idx], (opList[idx])._pvt_ptr, sizeof(cyruntime.cudaMemcpy3DBatchOp)) - elif len(opList) == 1: - cyopList = (opList[0])._pvt_ptr - with nogil: - err = cyruntime.cudaMemcpy3DBatchAsync(numOps, cyopList, flags, cystream) - if len(opList) > 1 and cyopList is not NULL: - free(cyopList) + try: + if numOps > len(opList): raise RuntimeError("List is too small: " + str(len(opList)) + " < " + str(numOps)) + opList = [] if opList is None else opList + if not all(isinstance(_x, (cudaMemcpy3DBatchOp,)) for _x in opList): + raise TypeError("Argument 'opList' is not instance of type (expected tuple[cyruntime.cudaMemcpy3DBatchOp,] or list[cyruntime.cudaMemcpy3DBatchOp,]") + if len(opList) > 1: + cyopList = calloc(len(opList), sizeof(cyruntime.cudaMemcpy3DBatchOp)) + if cyopList is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(opList)) + 'x' + str(sizeof(cyruntime.cudaMemcpy3DBatchOp))) + for idx in range(len(opList)): + string.memcpy(&cyopList[idx], (opList[idx])._pvt_ptr, sizeof(cyruntime.cudaMemcpy3DBatchOp)) + elif len(opList) == 1: + cyopList = (opList[0])._pvt_ptr + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemcpy3DBatchAsync(numOps, cyopList, flags, cystream) + finally: + if len(opList) > 1 and cyopList is not NULL: + free(cyopList) return (_cudaError_t(err),) {{endif}} @@ -28338,22 +28519,27 @@ def cudaMemcpy2DAsync(dst, size_t dpitch, src, size_t spitch, size_t width, size :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2DAsync` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) + cdef cyruntime.cudaMemcpyKind cykind cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpy2DAsync(cydst, dpitch, cysrc, spitch, width, height, cykind, cystream) - _helper_input_void_ptr_free(&cydstHelper) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef _HelperInputVoidPtrStruct cydstHelper + cdef void* cydst + try: + cydst = _helper_input_void_ptr(dst, &cydstHelper) + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemcpy2DAsync(cydst, dpitch, cysrc, spitch, width, height, cykind, cystream) + finally: + _helper_input_void_ptr_free(&cydstHelper) + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -28422,27 +28608,31 @@ def cudaMemcpy2DToArrayAsync(dst, size_t wOffset, size_t hOffset, src, size_t sp :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - cdef cyruntime.cudaArray_t cydst - if dst is None: - pdst = 0 - elif isinstance(dst, (cudaArray_t,)): - pdst = int(dst) - else: - pdst = int(cudaArray_t(dst)) - cydst = pdst + cdef cyruntime.cudaMemcpyKind cykind cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpy2DToArrayAsync(cydst, wOffset, hOffset, cysrc, spitch, width, height, cykind, cystream) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef cyruntime.cudaArray_t cydst + try: + if dst is None: + pdst = 0 + elif isinstance(dst, (cudaArray_t,)): + pdst = int(dst) + else: + pdst = int(cudaArray_t(dst)) + cydst = pdst + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemcpy2DToArrayAsync(cydst, wOffset, hOffset, cysrc, spitch, width, height, cykind, cystream) + finally: + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -28510,27 +28700,31 @@ def cudaMemcpy2DFromArrayAsync(dst, size_t dpitch, src, size_t wOffset, size_t h :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream + cdef cyruntime.cudaMemcpyKind cykind cdef cyruntime.cudaArray_const_t cysrc - if src is None: - psrc = 0 - elif isinstance(src, (cudaArray_const_t,)): - psrc = int(src) - else: - psrc = int(cudaArray_const_t(src)) - cysrc = psrc cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpy2DFromArrayAsync(cydst, dpitch, cysrc, wOffset, hOffset, width, height, cykind, cystream) - _helper_input_void_ptr_free(&cydstHelper) + cdef void* cydst + try: + cydst = _helper_input_void_ptr(dst, &cydstHelper) + if src is None: + psrc = 0 + elif isinstance(src, (cudaArray_const_t,)): + psrc = int(src) + else: + psrc = int(cudaArray_const_t(src)) + cysrc = psrc + cykind = int(kind) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemcpy2DFromArrayAsync(cydst, dpitch, cysrc, wOffset, hOffset, width, height, cykind, cystream) + finally: + _helper_input_void_ptr_free(&cydstHelper) return (_cudaError_t(err),) {{endif}} @@ -28565,10 +28759,13 @@ def cudaMemset(devPtr, int value, size_t count): :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32` """ cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaMemset(cydevPtr, value, count) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + try: + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + with nogil: + err = cyruntime.cudaMemset(cydevPtr, value, count) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) return (_cudaError_t(err),) {{endif}} @@ -28610,10 +28807,13 @@ def cudaMemset2D(devPtr, size_t pitch, int value, size_t width, size_t height): :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32` """ cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaMemset2D(cydevPtr, pitch, value, width, height) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + try: + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + with nogil: + err = cyruntime.cudaMemset2D(cydevPtr, pitch, value, width, height) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) return (_cudaError_t(err),) {{endif}} @@ -28710,18 +28910,21 @@ def cudaMemsetAsync(devPtr, int value, size_t count, stream): :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32Async` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaMemsetAsync(cydevPtr, value, count, cystream) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + try: + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemsetAsync(cydevPtr, value, count, cystream) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) return (_cudaError_t(err),) {{endif}} @@ -28771,18 +28974,21 @@ def cudaMemset2DAsync(devPtr, size_t pitch, int value, size_t width, size_t heig :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32Async` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaMemset2DAsync(cydevPtr, pitch, value, width, height, cystream) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + try: + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemset2DAsync(cydevPtr, pitch, value, width, height, cystream) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) return (_cudaError_t(err),) {{endif}} @@ -28960,18 +29166,21 @@ def cudaMemPrefetchAsync(devPtr, size_t count, location not None : cudaMemLocati :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cuMemPrefetchAsync` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaMemPrefetchAsync(cydevPtr, count, location._pvt_ptr[0], flags, cystream) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + try: + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemPrefetchAsync(cydevPtr, count, location._pvt_ptr[0], flags, cystream) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) return (_cudaError_t(err),) {{endif}} @@ -29044,43 +29253,49 @@ def cudaMemPrefetchBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs): - raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]") - prefetchLocs = [] if prefetchLocs is None else prefetchLocs - if not all(isinstance(_x, (cudaMemLocation,)) for _x in prefetchLocs): - raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cyruntime.cudaMemLocation,] or list[cyruntime.cudaMemLocation,]") - if not all(isinstance(_x, (int)) for _x in sizes): - raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") - dptrs = [] if dptrs is None else dptrs - pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs] - cdef _InputVoidPtrPtrHelper voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist) - cdef void** cydptrs_ptr = voidStarHelperdptrs.cptr - cdef vector[size_t] cysizes = sizes - if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) - if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + cdef vector[size_t] cyprefetchLocIdxs cdef cyruntime.cudaMemLocation* cyprefetchLocs = NULL - if len(prefetchLocs) > 1: - cyprefetchLocs = calloc(len(prefetchLocs), sizeof(cyruntime.cudaMemLocation)) - if cyprefetchLocs is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cyruntime.cudaMemLocation))) - for idx in range(len(prefetchLocs)): - string.memcpy(&cyprefetchLocs[idx], (prefetchLocs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemLocation)) - elif len(prefetchLocs) == 1: - cyprefetchLocs = (prefetchLocs[0])._pvt_ptr - cdef vector[size_t] cyprefetchLocIdxs = prefetchLocIdxs - if numPrefetchLocs > len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs)) - if numPrefetchLocs > len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs)) - with nogil: - err = cyruntime.cudaMemPrefetchBatchAsync(cydptrs_ptr, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cystream) - if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL: - free(cyprefetchLocs) + cdef vector[size_t] cysizes + cdef _InputVoidPtrPtrHelper voidStarHelperdptrs + cdef void** cydptrs_ptr + try: + dptrs = [] if dptrs is None else dptrs + pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs] + voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist) + cydptrs_ptr = voidStarHelperdptrs.cptr + if not all(isinstance(_x, (int)) for _x in sizes): + raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") + cysizes = sizes + if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) + if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + prefetchLocs = [] if prefetchLocs is None else prefetchLocs + if not all(isinstance(_x, (cudaMemLocation,)) for _x in prefetchLocs): + raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cyruntime.cudaMemLocation,] or list[cyruntime.cudaMemLocation,]") + if len(prefetchLocs) > 1: + cyprefetchLocs = calloc(len(prefetchLocs), sizeof(cyruntime.cudaMemLocation)) + if cyprefetchLocs is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cyruntime.cudaMemLocation))) + for idx in range(len(prefetchLocs)): + string.memcpy(&cyprefetchLocs[idx], (prefetchLocs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemLocation)) + elif len(prefetchLocs) == 1: + cyprefetchLocs = (prefetchLocs[0])._pvt_ptr + if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs): + raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]") + cyprefetchLocIdxs = prefetchLocIdxs + if numPrefetchLocs > len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs)) + if numPrefetchLocs > len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs)) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemPrefetchBatchAsync(cydptrs_ptr, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cystream) + finally: + if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL: + free(cyprefetchLocs) return (_cudaError_t(err),) {{endif}} @@ -29137,6 +29352,18 @@ def cudaMemDiscardBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : t """ cdef cyruntime.cudaStream_t cystream + cdef vector[size_t] cysizes + cdef _InputVoidPtrPtrHelper voidStarHelperdptrs + cdef void** cydptrs_ptr + dptrs = [] if dptrs is None else dptrs + pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs] + voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist) + cydptrs_ptr = voidStarHelperdptrs.cptr + if not all(isinstance(_x, (int)) for _x in sizes): + raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") + cysizes = sizes + if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) + if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) if stream is None: pstream = 0 elif isinstance(stream, (cudaStream_t,driver.CUstream)): @@ -29144,15 +29371,6 @@ def cudaMemDiscardBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : t else: pstream = int(cudaStream_t(stream)) cystream = pstream - if not all(isinstance(_x, (int)) for _x in sizes): - raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") - dptrs = [] if dptrs is None else dptrs - pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs] - cdef _InputVoidPtrPtrHelper voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist) - cdef void** cydptrs_ptr = voidStarHelperdptrs.cptr - cdef vector[size_t] cysizes = sizes - if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) - if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) with nogil: err = cyruntime.cudaMemDiscardBatchAsync(cydptrs_ptr, cysizes.data(), count, flags, cystream) return (_cudaError_t(err),) @@ -29235,43 +29453,49 @@ def cudaMemDiscardAndPrefetchBatchAsync(dptrs : Optional[tuple[Any] | list[Any]] """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs): - raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]") - prefetchLocs = [] if prefetchLocs is None else prefetchLocs - if not all(isinstance(_x, (cudaMemLocation,)) for _x in prefetchLocs): - raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cyruntime.cudaMemLocation,] or list[cyruntime.cudaMemLocation,]") - if not all(isinstance(_x, (int)) for _x in sizes): - raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") - dptrs = [] if dptrs is None else dptrs - pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs] - cdef _InputVoidPtrPtrHelper voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist) - cdef void** cydptrs_ptr = voidStarHelperdptrs.cptr - cdef vector[size_t] cysizes = sizes - if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) - if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + cdef vector[size_t] cyprefetchLocIdxs cdef cyruntime.cudaMemLocation* cyprefetchLocs = NULL - if len(prefetchLocs) > 1: - cyprefetchLocs = calloc(len(prefetchLocs), sizeof(cyruntime.cudaMemLocation)) - if cyprefetchLocs is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cyruntime.cudaMemLocation))) - for idx in range(len(prefetchLocs)): - string.memcpy(&cyprefetchLocs[idx], (prefetchLocs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemLocation)) - elif len(prefetchLocs) == 1: - cyprefetchLocs = (prefetchLocs[0])._pvt_ptr - cdef vector[size_t] cyprefetchLocIdxs = prefetchLocIdxs - if numPrefetchLocs > len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs)) - if numPrefetchLocs > len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs)) - with nogil: - err = cyruntime.cudaMemDiscardAndPrefetchBatchAsync(cydptrs_ptr, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cystream) - if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL: - free(cyprefetchLocs) + cdef vector[size_t] cysizes + cdef _InputVoidPtrPtrHelper voidStarHelperdptrs + cdef void** cydptrs_ptr + try: + dptrs = [] if dptrs is None else dptrs + pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs] + voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist) + cydptrs_ptr = voidStarHelperdptrs.cptr + if not all(isinstance(_x, (int)) for _x in sizes): + raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]") + cysizes = sizes + if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) + if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) + prefetchLocs = [] if prefetchLocs is None else prefetchLocs + if not all(isinstance(_x, (cudaMemLocation,)) for _x in prefetchLocs): + raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cyruntime.cudaMemLocation,] or list[cyruntime.cudaMemLocation,]") + if len(prefetchLocs) > 1: + cyprefetchLocs = calloc(len(prefetchLocs), sizeof(cyruntime.cudaMemLocation)) + if cyprefetchLocs is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cyruntime.cudaMemLocation))) + for idx in range(len(prefetchLocs)): + string.memcpy(&cyprefetchLocs[idx], (prefetchLocs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemLocation)) + elif len(prefetchLocs) == 1: + cyprefetchLocs = (prefetchLocs[0])._pvt_ptr + if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs): + raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]") + cyprefetchLocIdxs = prefetchLocIdxs + if numPrefetchLocs > len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs)) + if numPrefetchLocs > len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs)) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemDiscardAndPrefetchBatchAsync(cydptrs_ptr, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cystream) + finally: + if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL: + free(cyprefetchLocs) return (_cudaError_t(err),) {{endif}} @@ -29464,12 +29688,16 @@ def cudaMemAdvise(devPtr, size_t count, advice not None : cudaMemoryAdvise, loca -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cuMemAdvise` """ + cdef cyruntime.cudaMemoryAdvise cyadvice cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - cdef cyruntime.cudaMemoryAdvise cyadvice = int(advice) - with nogil: - err = cyruntime.cudaMemAdvise(cydevPtr, count, cyadvice, location._pvt_ptr[0]) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + try: + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + cyadvice = int(advice) + with nogil: + err = cyruntime.cudaMemAdvise(cydevPtr, count, cyadvice, location._pvt_ptr[0]) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) return (_cudaError_t(err),) {{endif}} @@ -29611,14 +29839,20 @@ def cudaMemRangeGetAttribute(size_t dataSize, attribute not None : cudaMemRangeA -------- :py:obj:`~.cudaMemRangeGetAttributes`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cuMemRangeGetAttribute` """ - cdef _HelperCUmem_range_attribute cydata = _HelperCUmem_range_attribute(attribute, dataSize) - cdef void* cydata_ptr = cydata.cptr - cdef cyruntime.cudaMemRangeAttribute cyattribute = int(attribute) cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr, count) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + cdef cyruntime.cudaMemRangeAttribute cyattribute + cdef _HelperCUmem_range_attribute cydata + cdef void* cydata_ptr + try: + cydata = _HelperCUmem_range_attribute(attribute, dataSize) + cydata_ptr = cydata.cptr + cyattribute = int(attribute) + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + with nogil: + err = cyruntime.cudaMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr, count) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, cydata.pyObj()) @@ -29684,23 +29918,30 @@ def cudaMemRangeGetAttributes(dataSizes : tuple[int] | list[int], attributes : O -------- :py:obj:`~.cudaMemRangeGetAttribute`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cuMemRangeGetAttributes` """ - attributes = [] if attributes is None else attributes - if not all(isinstance(_x, (cudaMemRangeAttribute)) for _x in attributes): - raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cyruntime.cudaMemRangeAttribute] or list[cyruntime.cudaMemRangeAttribute]") - if not all(isinstance(_x, (int)) for _x in dataSizes): - raise TypeError("Argument 'dataSizes' is not instance of type (expected tuple[int] or list[int]") - pylist = [_HelperCUmem_range_attribute(pyattributes, pydataSizes) for (pyattributes, pydataSizes) in zip(attributes, dataSizes)] - cdef _InputVoidPtrPtrHelper voidStarHelperdata = _InputVoidPtrPtrHelper(pylist) - cdef void** cyvoidStarHelper_ptr = voidStarHelperdata.cptr - cdef vector[size_t] cydataSizes = dataSizes - cdef vector[cyruntime.cudaMemRangeAttribute] cyattributes = [int(pyattributes) for pyattributes in (attributes)] - if numAttributes > len(dataSizes): raise RuntimeError("List is too small: " + str(len(dataSizes)) + " < " + str(numAttributes)) - if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes)) cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr, count) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + cdef vector[cyruntime.cudaMemRangeAttribute] cyattributes + cdef vector[size_t] cydataSizes + cdef _InputVoidPtrPtrHelper voidStarHelperdata + cdef void** cyvoidStarHelper_ptr + try: + pylist = [_HelperCUmem_range_attribute(pyattributes, pydataSizes) for (pyattributes, pydataSizes) in zip(attributes, dataSizes)] + voidStarHelperdata = _InputVoidPtrPtrHelper(pylist) + cyvoidStarHelper_ptr = voidStarHelperdata.cptr + if not all(isinstance(_x, (int)) for _x in dataSizes): + raise TypeError("Argument 'dataSizes' is not instance of type (expected tuple[int] or list[int]") + cydataSizes = dataSizes + attributes = [] if attributes is None else attributes + if not all(isinstance(_x, (cudaMemRangeAttribute)) for _x in attributes): + raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cyruntime.cudaMemRangeAttribute] or list[cyruntime.cudaMemRangeAttribute]") + cyattributes = attributes + if numAttributes > len(dataSizes): raise RuntimeError("List is too small: " + str(len(dataSizes)) + " < " + str(numAttributes)) + if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes)) + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + with nogil: + err = cyruntime.cudaMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr, count) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, [obj.pyObj() for obj in pylist]) @@ -29749,20 +29990,24 @@ def cudaMemcpyToArray(dst, size_t wOffset, size_t hOffset, src, size_t count, ki -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyDtoA` """ - cdef cyruntime.cudaArray_t cydst - if dst is None: - pdst = 0 - elif isinstance(dst, (cudaArray_t,)): - pdst = int(dst) - else: - pdst = int(cudaArray_t(dst)) - cydst = pdst + cdef cyruntime.cudaMemcpyKind cykind cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpyToArray(cydst, wOffset, hOffset, cysrc, count, cykind) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef cyruntime.cudaArray_t cydst + try: + if dst is None: + pdst = 0 + elif isinstance(dst, (cudaArray_t,)): + pdst = int(dst) + else: + pdst = int(cudaArray_t(dst)) + cydst = pdst + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + with nogil: + err = cyruntime.cudaMemcpyToArray(cydst, wOffset, hOffset, cysrc, count, cykind) + finally: + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -29809,20 +30054,24 @@ def cudaMemcpyFromArray(dst, src, size_t wOffset, size_t hOffset, size_t count, -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoD` """ + cdef cyruntime.cudaMemcpyKind cykind cdef cyruntime.cudaArray_const_t cysrc - if src is None: - psrc = 0 - elif isinstance(src, (cudaArray_const_t,)): - psrc = int(src) - else: - psrc = int(cudaArray_const_t(src)) - cysrc = psrc cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpyFromArray(cydst, cysrc, wOffset, hOffset, count, cykind) - _helper_input_void_ptr_free(&cydstHelper) + cdef void* cydst + try: + cydst = _helper_input_void_ptr(dst, &cydstHelper) + if src is None: + psrc = 0 + elif isinstance(src, (cudaArray_const_t,)): + psrc = int(src) + else: + psrc = int(cudaArray_const_t(src)) + cysrc = psrc + cykind = int(kind) + with nogil: + err = cyruntime.cudaMemcpyFromArray(cydst, cysrc, wOffset, hOffset, count, cykind) + finally: + _helper_input_void_ptr_free(&cydstHelper) return (_cudaError_t(err),) {{endif}} @@ -29874,14 +30123,8 @@ def cudaMemcpyArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, size_ -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoA` """ + cdef cyruntime.cudaMemcpyKind cykind cdef cyruntime.cudaArray_const_t cysrc - if src is None: - psrc = 0 - elif isinstance(src, (cudaArray_const_t,)): - psrc = int(src) - else: - psrc = int(cudaArray_const_t(src)) - cysrc = psrc cdef cyruntime.cudaArray_t cydst if dst is None: pdst = 0 @@ -29890,7 +30133,14 @@ def cudaMemcpyArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, size_ else: pdst = int(cudaArray_t(dst)) cydst = pdst - cdef cyruntime.cudaMemcpyKind cykind = int(kind) + if src is None: + psrc = 0 + elif isinstance(src, (cudaArray_const_t,)): + psrc = int(src) + else: + psrc = int(cudaArray_const_t(src)) + cysrc = psrc + cykind = int(kind) with nogil: err = cyruntime.cudaMemcpyArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, count, cykind) return (_cudaError_t(err),) @@ -29949,27 +30199,31 @@ def cudaMemcpyToArrayAsync(dst, size_t wOffset, size_t hOffset, src, size_t coun :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpy2DAsync` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream - cdef cyruntime.cudaArray_t cydst - if dst is None: - pdst = 0 - elif isinstance(dst, (cudaArray_t,)): - pdst = int(dst) - else: - pdst = int(cudaArray_t(dst)) - cydst = pdst + cdef cyruntime.cudaMemcpyKind cykind cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpyToArrayAsync(cydst, wOffset, hOffset, cysrc, count, cykind, cystream) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef cyruntime.cudaArray_t cydst + try: + if dst is None: + pdst = 0 + elif isinstance(dst, (cudaArray_t,)): + pdst = int(dst) + else: + pdst = int(cudaArray_t(dst)) + cydst = pdst + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemcpyToArrayAsync(cydst, wOffset, hOffset, cysrc, count, cykind, cystream) + finally: + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -30026,27 +30280,31 @@ def cudaMemcpyFromArrayAsync(dst, src, size_t wOffset, size_t hOffset, size_t co :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpy2DAsync` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream + cdef cyruntime.cudaMemcpyKind cykind cdef cyruntime.cudaArray_const_t cysrc - if src is None: - psrc = 0 - elif isinstance(src, (cudaArray_const_t,)): - psrc = int(src) - else: - psrc = int(cudaArray_const_t(src)) - cysrc = psrc cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaMemcpyFromArrayAsync(cydst, cysrc, wOffset, hOffset, count, cykind, cystream) - _helper_input_void_ptr_free(&cydstHelper) + cdef void* cydst + try: + cydst = _helper_input_void_ptr(dst, &cydstHelper) + if src is None: + psrc = 0 + elif isinstance(src, (cudaArray_const_t,)): + psrc = int(src) + else: + psrc = int(cudaArray_const_t(src)) + cysrc = psrc + cykind = int(kind) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream + with nogil: + err = cyruntime.cudaMemcpyFromArrayAsync(cydst, cysrc, wOffset, hOffset, count, cykind, cystream) + finally: + _helper_input_void_ptr_free(&cydstHelper) return (_cudaError_t(err),) {{endif}} @@ -30090,6 +30348,7 @@ def cudaMallocAsync(size_t size, hStream): During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters. """ cdef cyruntime.cudaStream_t cyhStream + cdef void_ptr devPtr = 0 if hStream is None: phStream = 0 elif isinstance(hStream, (cudaStream_t,driver.CUstream)): @@ -30097,7 +30356,6 @@ def cudaMallocAsync(size_t size, hStream): else: phStream = int(cudaStream_t(hStream)) cyhStream = phStream - cdef void_ptr devPtr = 0 with nogil: err = cyruntime.cudaMallocAsync(&devPtr, size, cyhStream) if err != cyruntime.cudaSuccess: @@ -30137,18 +30395,21 @@ def cudaFreeAsync(devPtr, hStream): During stream capture, this function results in the creation of a free node and must therefore be passed the address of a graph allocation. """ cdef cyruntime.cudaStream_t cyhStream - if hStream is None: - phStream = 0 - elif isinstance(hStream, (cudaStream_t,driver.CUstream)): - phStream = int(hStream) - else: - phStream = int(cudaStream_t(hStream)) - cyhStream = phStream cdef _HelperInputVoidPtrStruct cydevPtrHelper - cdef void* cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) - with nogil: - err = cyruntime.cudaFreeAsync(cydevPtr, cyhStream) - _helper_input_void_ptr_free(&cydevPtrHelper) + cdef void* cydevPtr + try: + cydevPtr = _helper_input_void_ptr(devPtr, &cydevPtrHelper) + if hStream is None: + phStream = 0 + elif isinstance(hStream, (cudaStream_t,driver.CUstream)): + phStream = int(hStream) + else: + phStream = int(cudaStream_t(hStream)) + cyhStream = phStream + with nogil: + err = cyruntime.cudaFreeAsync(cydevPtr, cyhStream) + finally: + _helper_input_void_ptr_free(&cydevPtrHelper) return (_cudaError_t(err),) {{endif}} @@ -30262,6 +30523,9 @@ def cudaMemPoolSetAttribute(memPool, attr not None : cudaMemPoolAttr, value): -------- :py:obj:`~.cuMemPoolSetAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate` """ + cdef _HelperCUmemPool_attribute cyvalue + cdef void *cyvalueptr + cdef cyruntime.cudaMemPoolAttr cyattr cdef cyruntime.cudaMemPool_t cymemPool if memPool is None: pmemPool = 0 @@ -30270,9 +30534,9 @@ def cudaMemPoolSetAttribute(memPool, attr not None : cudaMemPoolAttr, value): else: pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool - cdef cyruntime.cudaMemPoolAttr cyattr = int(attr) - cdef _HelperCUmemPool_attribute cyvalue = _HelperCUmemPool_attribute(attr, value, is_getter=False) - cdef void* cyvalue_ptr = cyvalue.cptr + cyattr = int(attr) + cyvalue = _HelperCUmemPool_attribute(attr, value, is_getter=False) + cyvalue_ptr = cyvalue.cptr with nogil: err = cyruntime.cudaMemPoolSetAttribute(cymemPool, cyattr, cyvalue_ptr) return (_cudaError_t(err),) @@ -30344,6 +30608,9 @@ def cudaMemPoolGetAttribute(memPool, attr not None : cudaMemPoolAttr): -------- :py:obj:`~.cuMemPoolGetAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate` """ + cdef _HelperCUmemPool_attribute cyvalue + cdef void* cyvalue_ptr + cdef cyruntime.cudaMemPoolAttr cyattr cdef cyruntime.cudaMemPool_t cymemPool if memPool is None: pmemPool = 0 @@ -30352,9 +30619,9 @@ def cudaMemPoolGetAttribute(memPool, attr not None : cudaMemPoolAttr): else: pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool - cdef cyruntime.cudaMemPoolAttr cyattr = int(attr) - cdef _HelperCUmemPool_attribute cyvalue = _HelperCUmemPool_attribute(attr, 0, is_getter=True) - cdef void* cyvalue_ptr = cyvalue.cptr + cyattr = int(attr) + cyvalue = _HelperCUmemPool_attribute(attr, 0, is_getter=True) + cyvalue_ptr = cyvalue.cptr with nogil: err = cyruntime.cudaMemPoolGetAttribute(cymemPool, cyattr, cyvalue_ptr) if err != cyruntime.cudaSuccess: @@ -30387,31 +30654,33 @@ def cudaMemPoolSetAccess(memPool, descList : Optional[tuple[cudaMemAccessDesc] | -------- :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cudaMemPoolGetAccess`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync` """ - descList = [] if descList is None else descList - if not all(isinstance(_x, (cudaMemAccessDesc,)) for _x in descList): - raise TypeError("Argument 'descList' is not instance of type (expected tuple[cyruntime.cudaMemAccessDesc,] or list[cyruntime.cudaMemAccessDesc,]") - cdef cyruntime.cudaMemPool_t cymemPool - if memPool is None: - pmemPool = 0 - elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)): - pmemPool = int(memPool) - else: - pmemPool = int(cudaMemPool_t(memPool)) - cymemPool = pmemPool cdef cyruntime.cudaMemAccessDesc* cydescList = NULL - if len(descList) > 1: - cydescList = calloc(len(descList), sizeof(cyruntime.cudaMemAccessDesc)) - if cydescList is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(descList)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc))) - for idx in range(len(descList)): - string.memcpy(&cydescList[idx], (descList[idx])._pvt_ptr, sizeof(cyruntime.cudaMemAccessDesc)) - elif len(descList) == 1: - cydescList = (descList[0])._pvt_ptr - if count > len(descList): raise RuntimeError("List is too small: " + str(len(descList)) + " < " + str(count)) - with nogil: - err = cyruntime.cudaMemPoolSetAccess(cymemPool, cydescList, count) - if len(descList) > 1 and cydescList is not NULL: - free(cydescList) + cdef cyruntime.cudaMemPool_t cymemPool + try: + if memPool is None: + pmemPool = 0 + elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)): + pmemPool = int(memPool) + else: + pmemPool = int(cudaMemPool_t(memPool)) + cymemPool = pmemPool + descList = [] if descList is None else descList + if not all(isinstance(_x, (cudaMemAccessDesc,)) for _x in descList): + raise TypeError("Argument 'descList' is not instance of type (expected tuple[cyruntime.cudaMemAccessDesc,] or list[cyruntime.cudaMemAccessDesc,]") + if len(descList) > 1: + cydescList = calloc(len(descList), sizeof(cyruntime.cudaMemAccessDesc)) + if cydescList is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(descList)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc))) + for idx in range(len(descList)): + string.memcpy(&cydescList[idx], (descList[idx])._pvt_ptr, sizeof(cyruntime.cudaMemAccessDesc)) + elif len(descList) == 1: + cydescList = (descList[0])._pvt_ptr + if count > len(descList): raise RuntimeError("List is too small: " + str(len(descList)) + " < " + str(count)) + with nogil: + err = cyruntime.cudaMemPoolSetAccess(cymemPool, cydescList, count) + finally: + if len(descList) > 1 and cydescList is not NULL: + free(cydescList) return (_cudaError_t(err),) {{endif}} @@ -30442,7 +30711,9 @@ def cudaMemPoolGetAccess(memPool, location : Optional[cudaMemLocation]): -------- :py:obj:`~.cuMemPoolGetAccess`, :py:obj:`~.cudaMemPoolSetAccess` """ + cdef cyruntime.cudaMemLocation* cylocation_ptr cdef cyruntime.cudaMemPool_t cymemPool + cdef cyruntime.cudaMemAccessFlags flags if memPool is None: pmemPool = 0 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)): @@ -30450,8 +30721,7 @@ def cudaMemPoolGetAccess(memPool, location : Optional[cudaMemLocation]): else: pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool - cdef cyruntime.cudaMemAccessFlags flags - cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL + cylocation_ptr = location._pvt_ptr if location is not None else NULL with nogil: err = cyruntime.cudaMemPoolGetAccess(&flags, cymemPool, cylocation_ptr) if err != cyruntime.cudaSuccess: @@ -30550,8 +30820,10 @@ def cudaMemPoolCreate(poolProps : Optional[cudaMemPoolProps]): ----- Specifying :py:obj:`~.cudaMemHandleTypeNone` creates a memory pool that will not support IPC. """ - cdef cudaMemPool_t memPool = cudaMemPool_t() - cdef cyruntime.cudaMemPoolProps* cypoolProps_ptr = poolProps._pvt_ptr if poolProps is not None else NULL + cdef cyruntime.cudaMemPoolProps* cypoolProps_ptr + cdef cudaMemPool_t memPool + memPool = cudaMemPool_t() + cypoolProps_ptr = poolProps._pvt_ptr if poolProps is not None else NULL with nogil: err = cyruntime.cudaMemPoolCreate(memPool._pvt_ptr, cypoolProps_ptr) if err != cyruntime.cudaSuccess: @@ -30640,9 +30912,12 @@ def cudaMemGetDefaultMemPool(location : Optional[cudaMemLocation], typename not -------- :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemPoolTrimTo`, :py:obj:`~.cuMemPoolGetAttribute`, :py:obj:`~.cuMemPoolSetAttribute`, cuMemPoolSetAccess, :py:obj:`~.cuMemGetMemPool`, :py:obj:`~.cuMemPoolCreate` """ - cdef cudaMemPool_t memPool = cudaMemPool_t() - cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL - cdef cyruntime.cudaMemAllocationType cytypename = int(typename) + cdef cyruntime.cudaMemAllocationType cytypename + cdef cyruntime.cudaMemLocation* cylocation_ptr + cdef cudaMemPool_t memPool + memPool = cudaMemPool_t() + cylocation_ptr = location._pvt_ptr if location is not None else NULL + cytypename = int(typename) with nogil: err = cyruntime.cudaMemGetDefaultMemPool(memPool._pvt_ptr, cylocation_ptr, cytypename) if err != cyruntime.cudaSuccess: @@ -30694,9 +30969,12 @@ def cudaMemGetMemPool(location : Optional[cudaMemLocation], typename not None : -------- :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuDeviceSetMemPool`, :py:obj:`~.cuMemSetMemPool` """ - cdef cudaMemPool_t memPool = cudaMemPool_t() - cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL - cdef cyruntime.cudaMemAllocationType cytypename = int(typename) + cdef cyruntime.cudaMemAllocationType cytypename + cdef cyruntime.cudaMemLocation* cylocation_ptr + cdef cudaMemPool_t memPool + memPool = cudaMemPool_t() + cylocation_ptr = location._pvt_ptr if location is not None else NULL + cytypename = int(typename) with nogil: err = cyruntime.cudaMemGetMemPool(memPool._pvt_ptr, cylocation_ptr, cytypename) if err != cyruntime.cudaSuccess: @@ -30757,6 +31035,10 @@ def cudaMemSetMemPool(location : Optional[cudaMemLocation], typename not None : Use :py:obj:`~.cudaMallocFromPoolAsync` to specify asynchronous allocations from a device different than the one the stream runs on. """ cdef cyruntime.cudaMemPool_t cymemPool + cdef cyruntime.cudaMemAllocationType cytypename + cdef cyruntime.cudaMemLocation* cylocation_ptr + cylocation_ptr = location._pvt_ptr if location is not None else NULL + cytypename = int(typename) if memPool is None: pmemPool = 0 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)): @@ -30764,8 +31046,6 @@ def cudaMemSetMemPool(location : Optional[cudaMemLocation], typename not None : else: pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool - cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL - cdef cyruntime.cudaMemAllocationType cytypename = int(typename) with nogil: err = cyruntime.cudaMemSetMemPool(cylocation_ptr, cytypename, cymemPool) return (_cudaError_t(err),) @@ -30807,14 +31087,8 @@ def cudaMallocFromPoolAsync(size_t size, memPool, stream): During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters. """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef cyruntime.cudaMemPool_t cymemPool + cdef void_ptr ptr = 0 if memPool is None: pmemPool = 0 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)): @@ -30822,7 +31096,13 @@ def cudaMallocFromPoolAsync(size_t size, memPool, stream): else: pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool - cdef void_ptr ptr = 0 + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream with nogil: err = cyruntime.cudaMallocFromPoolAsync(&ptr, size, cymemPool, cystream) if err != cyruntime.cudaSuccess: @@ -30869,7 +31149,12 @@ def cudaMemPoolExportToShareableHandle(memPool, handleType not None : cudaMemAll ----- : To create an IPC capable mempool, create a mempool with a CUmemAllocationHandleType other than cudaMemHandleTypeNone. """ + cdef cyruntime.cudaMemAllocationHandleType cyhandleType cdef cyruntime.cudaMemPool_t cymemPool + cdef _HelperCUmemAllocationHandleType cyshareableHandle + cdef void* cyshareableHandle_ptr + cyshareableHandle = _HelperCUmemAllocationHandleType(handleType) + cyshareableHandle_ptr = cyshareableHandle.cptr if memPool is None: pmemPool = 0 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)): @@ -30877,9 +31162,7 @@ def cudaMemPoolExportToShareableHandle(memPool, handleType not None : cudaMemAll else: pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool - cdef _HelperCUmemAllocationHandleType cyshareableHandle = _HelperCUmemAllocationHandleType(handleType) - cdef void* cyshareableHandle_ptr = cyshareableHandle.cptr - cdef cyruntime.cudaMemAllocationHandleType cyhandleType = int(handleType) + cyhandleType = int(handleType) with nogil: err = cyruntime.cudaMemPoolExportToShareableHandle(cyshareableHandle_ptr, cymemPool, cyhandleType, flags) if err != cyruntime.cudaSuccess: @@ -30920,13 +31203,18 @@ def cudaMemPoolImportFromShareableHandle(shareableHandle, handleType not None : ----- Imported memory pools do not support creating new allocations. As such imported memory pools may not be used in :py:obj:`~.cudaDeviceSetMemPool` or :py:obj:`~.cudaMallocFromPoolAsync` calls. """ - cdef cudaMemPool_t memPool = cudaMemPool_t() + cdef cyruntime.cudaMemAllocationHandleType cyhandleType cdef _HelperInputVoidPtrStruct cyshareableHandleHelper - cdef void* cyshareableHandle = _helper_input_void_ptr(shareableHandle, &cyshareableHandleHelper) - cdef cyruntime.cudaMemAllocationHandleType cyhandleType = int(handleType) - with nogil: - err = cyruntime.cudaMemPoolImportFromShareableHandle(memPool._pvt_ptr, cyshareableHandle, cyhandleType, flags) - _helper_input_void_ptr_free(&cyshareableHandleHelper) + cdef void* cyshareableHandle + cdef cudaMemPool_t memPool + try: + memPool = cudaMemPool_t() + cyshareableHandle = _helper_input_void_ptr(shareableHandle, &cyshareableHandleHelper) + cyhandleType = int(handleType) + with nogil: + err = cyruntime.cudaMemPoolImportFromShareableHandle(memPool._pvt_ptr, cyshareableHandle, cyhandleType, flags) + finally: + _helper_input_void_ptr_free(&cyshareableHandleHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, memPool) @@ -30959,12 +31247,16 @@ def cudaMemPoolExportPointer(ptr): -------- :py:obj:`~.cuMemPoolExportPointer`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolImportPointer` """ - cdef cudaMemPoolPtrExportData exportData = cudaMemPoolPtrExportData() cdef _HelperInputVoidPtrStruct cyptrHelper - cdef void* cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) - with nogil: - err = cyruntime.cudaMemPoolExportPointer(exportData._pvt_ptr, cyptr) - _helper_input_void_ptr_free(&cyptrHelper) + cdef void* cyptr + cdef cudaMemPoolPtrExportData exportData + try: + exportData = cudaMemPoolPtrExportData() + cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) + with nogil: + err = cyruntime.cudaMemPoolExportPointer(exportData._pvt_ptr, cyptr) + finally: + _helper_input_void_ptr_free(&cyptrHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, exportData) @@ -31006,7 +31298,9 @@ def cudaMemPoolImportPointer(memPool, exportData : Optional[cudaMemPoolPtrExport ----- The :py:obj:`~.cudaFreeAsync` api may be used in the exporting process before the :py:obj:`~.cudaFreeAsync` operation completes in its stream as long as the :py:obj:`~.cudaFreeAsync` in the exporting process specifies a stream with a stream dependency on the importing process's :py:obj:`~.cudaFreeAsync`. """ + cdef cyruntime.cudaMemPoolPtrExportData* cyexportData_ptr cdef cyruntime.cudaMemPool_t cymemPool + cdef void_ptr ptr = 0 if memPool is None: pmemPool = 0 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)): @@ -31014,8 +31308,7 @@ def cudaMemPoolImportPointer(memPool, exportData : Optional[cudaMemPoolPtrExport else: pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool - cdef void_ptr ptr = 0 - cdef cyruntime.cudaMemPoolPtrExportData* cyexportData_ptr = exportData._pvt_ptr if exportData is not None else NULL + cyexportData_ptr = exportData._pvt_ptr if exportData is not None else NULL with nogil: err = cyruntime.cudaMemPoolImportPointer(&ptr, cymemPool, cyexportData_ptr) if err != cyruntime.cudaSuccess: @@ -31085,12 +31378,16 @@ def cudaPointerGetAttributes(ptr): ----- In CUDA 11.0 forward passing host pointer will return :py:obj:`~.cudaMemoryTypeUnregistered` in :py:obj:`~.cudaPointerAttributes.type` and call will return :py:obj:`~.cudaSuccess`. """ - cdef cudaPointerAttributes attributes = cudaPointerAttributes() cdef _HelperInputVoidPtrStruct cyptrHelper - cdef void* cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) - with nogil: - err = cyruntime.cudaPointerGetAttributes(attributes._pvt_ptr, cyptr) - _helper_input_void_ptr_free(&cyptrHelper) + cdef void* cyptr + cdef cudaPointerAttributes attributes + try: + attributes = cudaPointerAttributes() + cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) + with nogil: + err = cyruntime.cudaPointerGetAttributes(attributes._pvt_ptr, cyptr) + finally: + _helper_input_void_ptr_free(&cyptrHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, attributes) @@ -31351,13 +31648,6 @@ def cudaGraphicsMapResources(int count, resources, stream): :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaGraphicsUnmapResources`, :py:obj:`~.cuGraphicsMapResources` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef cyruntime.cudaGraphicsResource_t *cyresources if resources is None: cyresources = NULL @@ -31368,6 +31658,13 @@ def cudaGraphicsMapResources(int count, resources, stream): cyresources = resources else: raise TypeError("Argument 'resources' is not instance of type (expected , found " + str(type(resources))) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream with nogil: err = cyruntime.cudaGraphicsMapResources(count, cyresources, cystream) return (_cudaError_t(err),) @@ -31412,13 +31709,6 @@ def cudaGraphicsUnmapResources(int count, resources, stream): :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef cyruntime.cudaGraphicsResource_t *cyresources if resources is None: cyresources = NULL @@ -31429,6 +31719,13 @@ def cudaGraphicsUnmapResources(int count, resources, stream): cyresources = resources else: raise TypeError("Argument 'resources' is not instance of type (expected , found " + str(type(resources))) + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream with nogil: err = cyruntime.cudaGraphicsUnmapResources(count, cyresources, cystream) return (_cudaError_t(err),) @@ -31464,6 +31761,8 @@ def cudaGraphicsResourceGetMappedPointer(resource): None """ cdef cyruntime.cudaGraphicsResource_t cyresource + cdef size_t size = 0 + cdef void_ptr devPtr = 0 if resource is None: presource = 0 elif isinstance(resource, (cudaGraphicsResource_t,)): @@ -31471,8 +31770,6 @@ def cudaGraphicsResourceGetMappedPointer(resource): else: presource = int(cudaGraphicsResource_t(resource)) cyresource = presource - cdef void_ptr devPtr = 0 - cdef size_t size = 0 with nogil: err = cyruntime.cudaGraphicsResourceGetMappedPointer(&devPtr, &size, cyresource) if err != cyruntime.cudaSuccess: @@ -31522,6 +31819,8 @@ def cudaGraphicsSubResourceGetMappedArray(resource, unsigned int arrayIndex, uns :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray` """ cdef cyruntime.cudaGraphicsResource_t cyresource + cdef cudaArray_t array + array = cudaArray_t() if resource is None: presource = 0 elif isinstance(resource, (cudaGraphicsResource_t,)): @@ -31529,7 +31828,6 @@ def cudaGraphicsSubResourceGetMappedArray(resource, unsigned int arrayIndex, uns else: presource = int(cudaGraphicsResource_t(resource)) cyresource = presource - cdef cudaArray_t array = cudaArray_t() with nogil: err = cyruntime.cudaGraphicsSubResourceGetMappedArray(array._pvt_ptr, cyresource, arrayIndex, mipLevel) if err != cyruntime.cudaSuccess: @@ -31568,6 +31866,8 @@ def cudaGraphicsResourceGetMappedMipmappedArray(resource): :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsResourceGetMappedMipmappedArray` """ cdef cyruntime.cudaGraphicsResource_t cyresource + cdef cudaMipmappedArray_t mipmappedArray + mipmappedArray = cudaMipmappedArray_t() if resource is None: presource = 0 elif isinstance(resource, (cudaGraphicsResource_t,)): @@ -31575,7 +31875,6 @@ def cudaGraphicsResourceGetMappedMipmappedArray(resource): else: presource = int(cudaGraphicsResource_t(resource)) cyresource = presource - cdef cudaMipmappedArray_t mipmappedArray = cudaMipmappedArray_t() with nogil: err = cyruntime.cudaGraphicsResourceGetMappedMipmappedArray(mipmappedArray._pvt_ptr, cyresource) if err != cyruntime.cudaSuccess: @@ -31608,6 +31907,8 @@ def cudaGetChannelDesc(array): :py:obj:`~.cudaCreateChannelDesc (C API)`, :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cudaCreateSurfaceObject` """ cdef cyruntime.cudaArray_const_t cyarray + cdef cudaChannelFormatDesc desc + desc = cudaChannelFormatDesc() if array is None: parray = 0 elif isinstance(array, (cudaArray_const_t,)): @@ -31615,7 +31916,6 @@ def cudaGetChannelDesc(array): else: parray = int(cudaArray_const_t(array)) cyarray = parray - cdef cudaChannelFormatDesc desc = cudaChannelFormatDesc() with nogil: err = cyruntime.cudaGetChannelDesc(desc._pvt_ptr, cyarray) if err != cyruntime.cudaSuccess: @@ -31664,11 +31964,15 @@ def cudaCreateChannelDesc(int x, int y, int z, int w, f not None : cudaChannelFo -------- cudaCreateChannelDesc (C++ API), :py:obj:`~.cudaGetChannelDesc`, :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cudaCreateSurfaceObject` """ - cdef cyruntime.cudaChannelFormatKind cyf = int(f) - with nogil: - err = cyruntime.cudaCreateChannelDesc(x, y, z, w, cyf) - cdef cudaChannelFormatDesc wrapper = cudaChannelFormatDesc() - wrapper._pvt_ptr[0] = err + cdef cyruntime.cudaChannelFormatKind cyf + cdef cudaChannelFormatDesc wrapper + try: + cyf = int(f) + with nogil: + err = cyruntime.cudaCreateChannelDesc(x, y, z, w, cyf) + finally: + wrapper = cudaChannelFormatDesc() + wrapper._pvt_ptr[0] = err return (cudaError_t.cudaSuccess, wrapper) {{endif}} @@ -31906,10 +32210,14 @@ def cudaCreateTextureObject(pResDesc : Optional[cudaResourceDesc], pTexDesc : Op -------- :py:obj:`~.cudaDestroyTextureObject`, :py:obj:`~.cuTexObjectCreate` """ - cdef cudaTextureObject_t pTexObject = cudaTextureObject_t() - cdef cyruntime.cudaResourceDesc* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL - cdef cyruntime.cudaTextureDesc* cypTexDesc_ptr = pTexDesc._pvt_ptr if pTexDesc is not None else NULL - cdef cyruntime.cudaResourceViewDesc* cypResViewDesc_ptr = pResViewDesc._pvt_ptr if pResViewDesc is not None else NULL + cdef cyruntime.cudaResourceViewDesc* cypResViewDesc_ptr + cdef cyruntime.cudaTextureDesc* cypTexDesc_ptr + cdef cyruntime.cudaResourceDesc* cypResDesc_ptr + cdef cudaTextureObject_t pTexObject + pTexObject = cudaTextureObject_t() + cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL + cypTexDesc_ptr = pTexDesc._pvt_ptr if pTexDesc is not None else NULL + cypResViewDesc_ptr = pResViewDesc._pvt_ptr if pResViewDesc is not None else NULL with nogil: err = cyruntime.cudaCreateTextureObject(pTexObject._pvt_ptr, cypResDesc_ptr, cypTexDesc_ptr, cypResViewDesc_ptr) if err != cyruntime.cudaSuccess: @@ -31978,6 +32286,8 @@ def cudaGetTextureObjectResourceDesc(texObject): :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetResourceDesc` """ cdef cyruntime.cudaTextureObject_t cytexObject + cdef cudaResourceDesc pResDesc + pResDesc = cudaResourceDesc() if texObject is None: ptexObject = 0 elif isinstance(texObject, (cudaTextureObject_t,)): @@ -31985,7 +32295,6 @@ def cudaGetTextureObjectResourceDesc(texObject): else: ptexObject = int(cudaTextureObject_t(texObject)) cytexObject = ptexObject - cdef cudaResourceDesc pResDesc = cudaResourceDesc() with nogil: err = cyruntime.cudaGetTextureObjectResourceDesc(pResDesc._pvt_ptr, cytexObject) if err != cyruntime.cudaSuccess: @@ -32019,6 +32328,8 @@ def cudaGetTextureObjectTextureDesc(texObject): :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetTextureDesc` """ cdef cyruntime.cudaTextureObject_t cytexObject + cdef cudaTextureDesc pTexDesc + pTexDesc = cudaTextureDesc() if texObject is None: ptexObject = 0 elif isinstance(texObject, (cudaTextureObject_t,)): @@ -32026,7 +32337,6 @@ def cudaGetTextureObjectTextureDesc(texObject): else: ptexObject = int(cudaTextureObject_t(texObject)) cytexObject = ptexObject - cdef cudaTextureDesc pTexDesc = cudaTextureDesc() with nogil: err = cyruntime.cudaGetTextureObjectTextureDesc(pTexDesc._pvt_ptr, cytexObject) if err != cyruntime.cudaSuccess: @@ -32061,6 +32371,8 @@ def cudaGetTextureObjectResourceViewDesc(texObject): :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetResourceViewDesc` """ cdef cyruntime.cudaTextureObject_t cytexObject + cdef cudaResourceViewDesc pResViewDesc + pResViewDesc = cudaResourceViewDesc() if texObject is None: ptexObject = 0 elif isinstance(texObject, (cudaTextureObject_t,)): @@ -32068,7 +32380,6 @@ def cudaGetTextureObjectResourceViewDesc(texObject): else: ptexObject = int(cudaTextureObject_t(texObject)) cytexObject = ptexObject - cdef cudaResourceViewDesc pResViewDesc = cudaResourceViewDesc() with nogil: err = cyruntime.cudaGetTextureObjectResourceViewDesc(pResViewDesc._pvt_ptr, cytexObject) if err != cyruntime.cudaSuccess: @@ -32109,8 +32420,10 @@ def cudaCreateSurfaceObject(pResDesc : Optional[cudaResourceDesc]): -------- :py:obj:`~.cudaDestroySurfaceObject`, :py:obj:`~.cuSurfObjectCreate` """ - cdef cudaSurfaceObject_t pSurfObject = cudaSurfaceObject_t() - cdef cyruntime.cudaResourceDesc* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL + cdef cyruntime.cudaResourceDesc* cypResDesc_ptr + cdef cudaSurfaceObject_t pSurfObject + pSurfObject = cudaSurfaceObject_t() + cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL with nogil: err = cyruntime.cudaCreateSurfaceObject(pSurfObject._pvt_ptr, cypResDesc_ptr) if err != cyruntime.cudaSuccess: @@ -32176,6 +32489,8 @@ def cudaGetSurfaceObjectResourceDesc(surfObject): :py:obj:`~.cudaCreateSurfaceObject`, :py:obj:`~.cuSurfObjectGetResourceDesc` """ cdef cyruntime.cudaSurfaceObject_t cysurfObject + cdef cudaResourceDesc pResDesc + pResDesc = cudaResourceDesc() if surfObject is None: psurfObject = 0 elif isinstance(surfObject, (cudaSurfaceObject_t,)): @@ -32183,7 +32498,6 @@ def cudaGetSurfaceObjectResourceDesc(surfObject): else: psurfObject = int(cudaSurfaceObject_t(surfObject)) cysurfObject = psurfObject - cdef cudaResourceDesc pResDesc = cudaResourceDesc() with nogil: err = cyruntime.cudaGetSurfaceObjectResourceDesc(pResDesc._pvt_ptr, cysurfObject) if err != cyruntime.cudaSuccess: @@ -32282,20 +32596,24 @@ def cudaLogsRegisterCallback(callbackFunc, userData): Optional location to store the callback handle after it is registered """ - cdef cyruntime.cudaLogsCallback_t cycallbackFunc - if callbackFunc is None: - pcallbackFunc = 0 - elif isinstance(callbackFunc, (cudaLogsCallback_t,)): - pcallbackFunc = int(callbackFunc) - else: - pcallbackFunc = int(cudaLogsCallback_t(callbackFunc)) - cycallbackFunc = pcallbackFunc + cdef cudaLogsCallbackHandle callback_out cdef _HelperInputVoidPtrStruct cyuserDataHelper - cdef void* cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) - cdef cudaLogsCallbackHandle callback_out = cudaLogsCallbackHandle() - with nogil: - err = cyruntime.cudaLogsRegisterCallback(cycallbackFunc, cyuserData, callback_out._pvt_ptr) - _helper_input_void_ptr_free(&cyuserDataHelper) + cdef void* cyuserData + cdef cyruntime.cudaLogsCallback_t cycallbackFunc + try: + if callbackFunc is None: + pcallbackFunc = 0 + elif isinstance(callbackFunc, (cudaLogsCallback_t,)): + pcallbackFunc = int(callbackFunc) + else: + pcallbackFunc = int(cudaLogsCallback_t(callbackFunc)) + cycallbackFunc = pcallbackFunc + cyuserData = _helper_input_void_ptr(userData, &cyuserDataHelper) + callback_out = cudaLogsCallbackHandle() + with nogil: + err = cyruntime.cudaLogsRegisterCallback(cycallbackFunc, cyuserData, callback_out._pvt_ptr) + finally: + _helper_input_void_ptr_free(&cyuserDataHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, callback_out) @@ -32348,7 +32666,8 @@ def cudaLogsCurrent(unsigned int flags): iterator_out : :py:obj:`~.cudaLogIterator` Location to store an iterator to the current tail of the logs """ - cdef cudaLogIterator iterator_out = cudaLogIterator() + cdef cudaLogIterator iterator_out + iterator_out = cudaLogIterator() with nogil: err = cyruntime.cudaLogsCurrent(iterator_out._pvt_ptr, flags) if err != cyruntime.cudaSuccess: @@ -32482,7 +32801,8 @@ def cudaGraphCreate(unsigned int flags): -------- :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphDestroy`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphClone` """ - cdef cudaGraph_t pGraph = cudaGraph_t() + cdef cudaGraph_t pGraph + pGraph = cudaGraph_t() with nogil: err = cyruntime.cudaGraphCreate(pGraph._pvt_ptr, flags) if err != cyruntime.cudaSuccess: @@ -32579,34 +32899,38 @@ def cudaGraphAddKernelNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t ----- Kernels launched using graphs must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects. """ - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() + cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL - with nogil: - err = cyruntime.cudaGraphAddKernelNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypNodeParams_ptr) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL + with nogil: + err = cyruntime.cudaGraphAddKernelNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypNodeParams_ptr) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -32645,6 +32969,7 @@ def cudaGraphKernelNodeGetParams(node): -------- :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeSetParams` """ + cdef cudaKernelNodeParams pNodeParams cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -32653,7 +32978,7 @@ def cudaGraphKernelNodeGetParams(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cudaKernelNodeParams pNodeParams = cudaKernelNodeParams() + pNodeParams = cudaKernelNodeParams() with nogil: err = cyruntime.cudaGraphKernelNodeGetParams(cynode, pNodeParams._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -32685,6 +33010,7 @@ def cudaGraphKernelNodeSetParams(node, pNodeParams : Optional[cudaKernelNodePara -------- :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeGetParams` """ + cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -32693,7 +33019,7 @@ def cudaGraphKernelNodeSetParams(node, pNodeParams : Optional[cudaKernelNodePara else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL + cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphKernelNodeSetParams(cynode, cypNodeParams_ptr) return (_cudaError_t(err),) @@ -32726,13 +33052,6 @@ def cudaGraphKernelNodeCopyAttributes(hDst, hSrc): :py:obj:`~.cudaAccessPolicyWindow` """ cdef cyruntime.cudaGraphNode_t cyhSrc - if hSrc is None: - phSrc = 0 - elif isinstance(hSrc, (cudaGraphNode_t,driver.CUgraphNode)): - phSrc = int(hSrc) - else: - phSrc = int(cudaGraphNode_t(hSrc)) - cyhSrc = phSrc cdef cyruntime.cudaGraphNode_t cyhDst if hDst is None: phDst = 0 @@ -32741,6 +33060,13 @@ def cudaGraphKernelNodeCopyAttributes(hDst, hSrc): else: phDst = int(cudaGraphNode_t(hDst)) cyhDst = phDst + if hSrc is None: + phSrc = 0 + elif isinstance(hSrc, (cudaGraphNode_t,driver.CUgraphNode)): + phSrc = int(hSrc) + else: + phSrc = int(cudaGraphNode_t(hSrc)) + cyhSrc = phSrc with nogil: err = cyruntime.cudaGraphKernelNodeCopyAttributes(cyhDst, cyhSrc) return (_cudaError_t(err),) @@ -32773,6 +33099,8 @@ def cudaGraphKernelNodeGetAttribute(hNode, attr not None : cudaKernelNodeAttrID) -------- :py:obj:`~.cudaAccessPolicyWindow` """ + cdef cudaKernelNodeAttrValue value_out + cdef cyruntime.cudaKernelNodeAttrID cyattr cdef cyruntime.cudaGraphNode_t cyhNode if hNode is None: phNode = 0 @@ -32781,8 +33109,8 @@ def cudaGraphKernelNodeGetAttribute(hNode, attr not None : cudaKernelNodeAttrID) else: phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode - cdef cyruntime.cudaKernelNodeAttrID cyattr = int(attr) - cdef cudaKernelNodeAttrValue value_out = cudaKernelNodeAttrValue() + cyattr = int(attr) + value_out = cudaKernelNodeAttrValue() with nogil: err = cyruntime.cudaGraphKernelNodeGetAttribute(cyhNode, cyattr, value_out._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -32817,6 +33145,8 @@ def cudaGraphKernelNodeSetAttribute(hNode, attr not None : cudaKernelNodeAttrID, -------- :py:obj:`~.cudaAccessPolicyWindow` """ + cdef cyruntime.cudaKernelNodeAttrValue* cyvalue_ptr + cdef cyruntime.cudaKernelNodeAttrID cyattr cdef cyruntime.cudaGraphNode_t cyhNode if hNode is None: phNode = 0 @@ -32825,8 +33155,8 @@ def cudaGraphKernelNodeSetAttribute(hNode, attr not None : cudaKernelNodeAttrID, else: phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode - cdef cyruntime.cudaKernelNodeAttrID cyattr = int(attr) - cdef cyruntime.cudaKernelNodeAttrValue* cyvalue_ptr = value._pvt_ptr if value is not None else NULL + cyattr = int(attr) + cyvalue_ptr = value._pvt_ptr if value is not None else NULL with nogil: err = cyruntime.cudaGraphKernelNodeSetAttribute(cyhNode, cyattr, cyvalue_ptr) return (_cudaError_t(err),) @@ -32875,34 +33205,38 @@ def cudaGraphAddMemcpyNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t -------- :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphAddMemcpyNodeToSymbol`, :py:obj:`~.cudaGraphAddMemcpyNodeFromSymbol`, :py:obj:`~.cudaGraphAddMemcpyNode1D`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() + cdef cyruntime.cudaMemcpy3DParms* cypCopyParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - cdef cyruntime.cudaMemcpy3DParms* cypCopyParams_ptr = pCopyParams._pvt_ptr if pCopyParams is not None else NULL - with nogil: - err = cyruntime.cudaGraphAddMemcpyNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypCopyParams_ptr) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + cypCopyParams_ptr = pCopyParams._pvt_ptr if pCopyParams is not None else NULL + with nogil: + err = cyruntime.cudaGraphAddMemcpyNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypCopyParams_ptr) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -32967,39 +33301,45 @@ def cudaGraphAddMemcpyNode1D(graph, pDependencies : Optional[tuple[cudaGraphNode -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() - cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) + cdef cyruntime.cudaMemcpyKind cykind cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaGraphAddMemcpyNode1D(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cydst, cysrc, count, cykind) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) - _helper_input_void_ptr_free(&cydstHelper) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef _HelperInputVoidPtrStruct cydstHelper + cdef void* cydst + cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + cydst = _helper_input_void_ptr(dst, &cydstHelper) + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + with nogil: + err = cyruntime.cudaGraphAddMemcpyNode1D(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cydst, cysrc, count, cykind) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) + _helper_input_void_ptr_free(&cydstHelper) + _helper_input_void_ptr_free(&cysrcHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -33029,6 +33369,7 @@ def cudaGraphMemcpyNodeGetParams(node): -------- :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeSetParams` """ + cdef cudaMemcpy3DParms pNodeParams cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33037,7 +33378,7 @@ def cudaGraphMemcpyNodeGetParams(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cudaMemcpy3DParms pNodeParams = cudaMemcpy3DParms() + pNodeParams = cudaMemcpy3DParms() with nogil: err = cyruntime.cudaGraphMemcpyNodeGetParams(cynode, pNodeParams._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -33069,6 +33410,7 @@ def cudaGraphMemcpyNodeSetParams(node, pNodeParams : Optional[cudaMemcpy3DParms] -------- :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphMemcpyNodeSetParamsToSymbol`, :py:obj:`~.cudaGraphMemcpyNodeSetParamsFromSymbol`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams` """ + cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33077,7 +33419,7 @@ def cudaGraphMemcpyNodeSetParams(node, pNodeParams : Optional[cudaMemcpy3DParms] else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL + cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphMemcpyNodeSetParams(cynode, cypNodeParams_ptr) return (_cudaError_t(err),) @@ -33127,23 +33469,28 @@ def cudaGraphMemcpyNodeSetParams1D(node, dst, src, size_t count, kind not None : -------- :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams` """ - cdef cyruntime.cudaGraphNode_t cynode - if node is None: - pnode = 0 - elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): - pnode = int(node) - else: - pnode = int(cudaGraphNode_t(node)) - cynode = pnode - cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) + cdef cyruntime.cudaMemcpyKind cykind cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaGraphMemcpyNodeSetParams1D(cynode, cydst, cysrc, count, cykind) - _helper_input_void_ptr_free(&cydstHelper) - _helper_input_void_ptr_free(&cysrcHelper) + cdef void* cysrc + cdef _HelperInputVoidPtrStruct cydstHelper + cdef void* cydst + cdef cyruntime.cudaGraphNode_t cynode + try: + if node is None: + pnode = 0 + elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): + pnode = int(node) + else: + pnode = int(cudaGraphNode_t(node)) + cynode = pnode + cydst = _helper_input_void_ptr(dst, &cydstHelper) + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + with nogil: + err = cyruntime.cudaGraphMemcpyNodeSetParams1D(cynode, cydst, cysrc, count, cykind) + finally: + _helper_input_void_ptr_free(&cydstHelper) + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -33184,34 +33531,38 @@ def cudaGraphAddMemsetNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t -------- :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphMemsetNodeGetParams`, :py:obj:`~.cudaGraphMemsetNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode` """ - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() + cdef cyruntime.cudaMemsetParams* cypMemsetParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - cdef cyruntime.cudaMemsetParams* cypMemsetParams_ptr = pMemsetParams._pvt_ptr if pMemsetParams is not None else NULL - with nogil: - err = cyruntime.cudaGraphAddMemsetNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypMemsetParams_ptr) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + cypMemsetParams_ptr = pMemsetParams._pvt_ptr if pMemsetParams is not None else NULL + with nogil: + err = cyruntime.cudaGraphAddMemsetNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypMemsetParams_ptr) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -33241,6 +33592,7 @@ def cudaGraphMemsetNodeGetParams(node): -------- :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeSetParams` """ + cdef cudaMemsetParams pNodeParams cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33249,7 +33601,7 @@ def cudaGraphMemsetNodeGetParams(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cudaMemsetParams pNodeParams = cudaMemsetParams() + pNodeParams = cudaMemsetParams() with nogil: err = cyruntime.cudaGraphMemsetNodeGetParams(cynode, pNodeParams._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -33281,6 +33633,7 @@ def cudaGraphMemsetNodeSetParams(node, pNodeParams : Optional[cudaMemsetParams]) -------- :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeGetParams` """ + cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33289,7 +33642,7 @@ def cudaGraphMemsetNodeSetParams(node, pNodeParams : Optional[cudaMemsetParams]) else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL + cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphMemsetNodeSetParams(cynode, cypNodeParams_ptr) return (_cudaError_t(err),) @@ -33333,34 +33686,38 @@ def cudaGraphAddHostNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] -------- :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphHostNodeGetParams`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() + cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL - with nogil: - err = cyruntime.cudaGraphAddHostNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypNodeParams_ptr) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL + with nogil: + err = cyruntime.cudaGraphAddHostNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypNodeParams_ptr) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -33390,6 +33747,7 @@ def cudaGraphHostNodeGetParams(node): -------- :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeSetParams` """ + cdef cudaHostNodeParams pNodeParams cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33398,7 +33756,7 @@ def cudaGraphHostNodeGetParams(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cudaHostNodeParams pNodeParams = cudaHostNodeParams() + pNodeParams = cudaHostNodeParams() with nogil: err = cyruntime.cudaGraphHostNodeGetParams(cynode, pNodeParams._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -33430,6 +33788,7 @@ def cudaGraphHostNodeSetParams(node, pNodeParams : Optional[cudaHostNodeParams]) -------- :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeGetParams` """ + cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33438,7 +33797,7 @@ def cudaGraphHostNodeSetParams(node, pNodeParams : Optional[cudaHostNodeParams]) else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL + cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphHostNodeSetParams(cynode, cypNodeParams_ptr) return (_cudaError_t(err),) @@ -33486,40 +33845,43 @@ def cudaGraphAddChildGraphNode(graph, pDependencies : Optional[tuple[cudaGraphNo :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphClone` """ cdef cyruntime.cudaGraph_t cychildGraph - if childGraph is None: - pchildGraph = 0 - elif isinstance(childGraph, (cudaGraph_t,driver.CUgraph)): - pchildGraph = int(childGraph) - else: - pchildGraph = int(cudaGraph_t(childGraph)) - cychildGraph = pchildGraph - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - with nogil: - err = cyruntime.cudaGraphAddChildGraphNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cychildGraph) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + if childGraph is None: + pchildGraph = 0 + elif isinstance(childGraph, (cudaGraph_t,driver.CUgraph)): + pchildGraph = int(childGraph) + else: + pchildGraph = int(cudaGraph_t(childGraph)) + cychildGraph = pchildGraph + with nogil: + err = cyruntime.cudaGraphAddChildGraphNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cychildGraph) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -33554,6 +33916,7 @@ def cudaGraphChildGraphNodeGetGraph(node): -------- :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphNodeFindInClone` """ + cdef cudaGraph_t pGraph cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33562,7 +33925,7 @@ def cudaGraphChildGraphNodeGetGraph(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cudaGraph_t pGraph = cudaGraph_t() + pGraph = cudaGraph_t() with nogil: err = cyruntime.cudaGraphChildGraphNodeGetGraph(cynode, pGraph._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -33609,33 +33972,36 @@ def cudaGraphAddEmptyNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] -------- :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - with nogil: - err = cyruntime.cudaGraphAddEmptyNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + with nogil: + err = cyruntime.cudaGraphAddEmptyNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -33682,40 +34048,43 @@ def cudaGraphAddEventRecordNode(graph, pDependencies : Optional[tuple[cudaGraphN :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ cdef cyruntime.cudaEvent_t cyevent - if event is None: - pevent = 0 - elif isinstance(event, (cudaEvent_t,driver.CUevent)): - pevent = int(event) - else: - pevent = int(cudaEvent_t(event)) - cyevent = pevent - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - with nogil: - err = cyruntime.cudaGraphAddEventRecordNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cyevent) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + if event is None: + pevent = 0 + elif isinstance(event, (cudaEvent_t,driver.CUevent)): + pevent = int(event) + else: + pevent = int(cudaEvent_t(event)) + cyevent = pevent + with nogil: + err = cyruntime.cudaGraphAddEventRecordNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cyevent) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -33745,6 +34114,7 @@ def cudaGraphEventRecordNodeGetEvent(node): -------- :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent` """ + cdef cudaEvent_t event_out cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33753,7 +34123,7 @@ def cudaGraphEventRecordNodeGetEvent(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cudaEvent_t event_out = cudaEvent_t() + event_out = cudaEvent_t() with nogil: err = cyruntime.cudaGraphEventRecordNodeGetEvent(cynode, event_out._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -33786,13 +34156,6 @@ def cudaGraphEventRecordNodeSetEvent(node, event): :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent` """ cdef cyruntime.cudaEvent_t cyevent - if event is None: - pevent = 0 - elif isinstance(event, (cudaEvent_t,driver.CUevent)): - pevent = int(event) - else: - pevent = int(cudaEvent_t(event)) - cyevent = pevent cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33801,6 +34164,13 @@ def cudaGraphEventRecordNodeSetEvent(node, event): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode + if event is None: + pevent = 0 + elif isinstance(event, (cudaEvent_t,driver.CUevent)): + pevent = int(event) + else: + pevent = int(cudaEvent_t(event)) + cyevent = pevent with nogil: err = cyruntime.cudaGraphEventRecordNodeSetEvent(cynode, cyevent) return (_cudaError_t(err),) @@ -33850,40 +34220,43 @@ def cudaGraphAddEventWaitNode(graph, pDependencies : Optional[tuple[cudaGraphNod :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ cdef cyruntime.cudaEvent_t cyevent - if event is None: - pevent = 0 - elif isinstance(event, (cudaEvent_t,driver.CUevent)): - pevent = int(event) - else: - pevent = int(cudaEvent_t(event)) - cyevent = pevent - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - with nogil: - err = cyruntime.cudaGraphAddEventWaitNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cyevent) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + if event is None: + pevent = 0 + elif isinstance(event, (cudaEvent_t,driver.CUevent)): + pevent = int(event) + else: + pevent = int(cudaEvent_t(event)) + cyevent = pevent + with nogil: + err = cyruntime.cudaGraphAddEventWaitNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cyevent) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -33913,6 +34286,7 @@ def cudaGraphEventWaitNodeGetEvent(node): -------- :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent` """ + cdef cudaEvent_t event_out cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33921,7 +34295,7 @@ def cudaGraphEventWaitNodeGetEvent(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cudaEvent_t event_out = cudaEvent_t() + event_out = cudaEvent_t() with nogil: err = cyruntime.cudaGraphEventWaitNodeGetEvent(cynode, event_out._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -33954,13 +34328,6 @@ def cudaGraphEventWaitNodeSetEvent(node, event): :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent` """ cdef cyruntime.cudaEvent_t cyevent - if event is None: - pevent = 0 - elif isinstance(event, (cudaEvent_t,driver.CUevent)): - pevent = int(event) - else: - pevent = int(cudaEvent_t(event)) - cyevent = pevent cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -33969,6 +34336,13 @@ def cudaGraphEventWaitNodeSetEvent(node, event): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode + if event is None: + pevent = 0 + elif isinstance(event, (cudaEvent_t,driver.CUevent)): + pevent = int(event) + else: + pevent = int(cudaEvent_t(event)) + cyevent = pevent with nogil: err = cyruntime.cudaGraphEventWaitNodeSetEvent(cynode, cyevent) return (_cudaError_t(err),) @@ -34013,34 +34387,38 @@ def cudaGraphAddExternalSemaphoresSignalNode(graph, pDependencies : Optional[tup -------- :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeGetParams`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() + cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cyruntime.cudaGraphAddExternalSemaphoresSignalNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cyruntime.cudaGraphAddExternalSemaphoresSignalNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -34076,6 +34454,7 @@ def cudaGraphExternalSemaphoresSignalNodeGetParams(hNode): -------- :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync` """ + cdef cudaExternalSemaphoreSignalNodeParams params_out cdef cyruntime.cudaGraphNode_t cyhNode if hNode is None: phNode = 0 @@ -34084,7 +34463,7 @@ def cudaGraphExternalSemaphoresSignalNodeGetParams(hNode): else: phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode - cdef cudaExternalSemaphoreSignalNodeParams params_out = cudaExternalSemaphoreSignalNodeParams() + params_out = cudaExternalSemaphoreSignalNodeParams() with nogil: err = cyruntime.cudaGraphExternalSemaphoresSignalNodeGetParams(cyhNode, params_out._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -34117,6 +34496,7 @@ def cudaGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams : Optional[ -------- :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync` """ + cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t cyhNode if hNode is None: phNode = 0 @@ -34125,7 +34505,7 @@ def cudaGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams : Optional[ else: phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode - cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphExternalSemaphoresSignalNodeSetParams(cyhNode, cynodeParams_ptr) return (_cudaError_t(err),) @@ -34170,34 +34550,38 @@ def cudaGraphAddExternalSemaphoresWaitNode(graph, pDependencies : Optional[tuple -------- :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeGetParams`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() + cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cyruntime.cudaGraphAddExternalSemaphoresWaitNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cyruntime.cudaGraphAddExternalSemaphoresWaitNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -34233,6 +34617,7 @@ def cudaGraphExternalSemaphoresWaitNodeGetParams(hNode): -------- :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync` """ + cdef cudaExternalSemaphoreWaitNodeParams params_out cdef cyruntime.cudaGraphNode_t cyhNode if hNode is None: phNode = 0 @@ -34241,7 +34626,7 @@ def cudaGraphExternalSemaphoresWaitNodeGetParams(hNode): else: phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode - cdef cudaExternalSemaphoreWaitNodeParams params_out = cudaExternalSemaphoreWaitNodeParams() + params_out = cudaExternalSemaphoreWaitNodeParams() with nogil: err = cyruntime.cudaGraphExternalSemaphoresWaitNodeGetParams(cyhNode, params_out._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -34274,6 +34659,7 @@ def cudaGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams : Optional[cu -------- :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync` """ + cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t cyhNode if hNode is None: phNode = 0 @@ -34282,7 +34668,7 @@ def cudaGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams : Optional[cu else: phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode - cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphExternalSemaphoresWaitNodeSetParams(cyhNode, cynodeParams_ptr) return (_cudaError_t(err),) @@ -34366,34 +34752,38 @@ def cudaGraphAddMemAllocNode(graph, pDependencies : Optional[tuple[cudaGraphNode -------- :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaGraphMemAllocNodeGetParams`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() + cdef cyruntime.cudaMemAllocNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) - cdef cyruntime.cudaMemAllocNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cyruntime.cudaGraphAddMemAllocNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cyruntime.cudaGraphAddMemAllocNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -34426,6 +34816,7 @@ def cudaGraphMemAllocNodeGetParams(node): -------- :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams` """ + cdef cudaMemAllocNodeParams params_out cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -34434,7 +34825,7 @@ def cudaGraphMemAllocNodeGetParams(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cudaMemAllocNodeParams params_out = cudaMemAllocNodeParams() + params_out = cudaMemAllocNodeParams() with nogil: err = cyruntime.cudaGraphMemAllocNodeGetParams(cynode, params_out._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -34498,36 +34889,40 @@ def cudaGraphAddMemFreeNode(graph, pDependencies : Optional[tuple[cudaGraphNode_ -------- :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() - cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr - if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) cdef _HelperInputVoidPtrStruct cydptrHelper - cdef void* cydptr = _helper_input_void_ptr(dptr, &cydptrHelper) - with nogil: - err = cyruntime.cudaGraphAddMemFreeNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cydptr) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) - _helper_input_void_ptr_free(&cydptrHelper) + cdef void* cydptr + cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + if numDependencies > len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies)) + cydptr = _helper_input_void_ptr(dptr, &cydptrHelper) + with nogil: + err = cyruntime.cudaGraphAddMemFreeNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cydptr) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) + _helper_input_void_ptr_free(&cydptrHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -34557,6 +34952,8 @@ def cudaGraphMemFreeNodeGetParams(node): -------- :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams` """ + cdef void_ptr dptr_out = 0 + cdef void* cydptr_out_ptr = &dptr_out cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -34565,8 +34962,6 @@ def cudaGraphMemFreeNodeGetParams(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef void_ptr dptr_out = 0 - cdef void* cydptr_out_ptr = &dptr_out with nogil: err = cyruntime.cudaGraphMemFreeNodeGetParams(cynode, cydptr_out_ptr) if err != cyruntime.cudaSuccess: @@ -34644,9 +35039,12 @@ def cudaDeviceGetGraphMemAttribute(int device, attr not None : cudaGraphMemAttri -------- :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync` """ - cdef cyruntime.cudaGraphMemAttributeType cyattr = int(attr) - cdef _HelperCUgraphMem_attribute cyvalue = _HelperCUgraphMem_attribute(attr, 0, is_getter=True) - cdef void* cyvalue_ptr = cyvalue.cptr + cdef _HelperCUgraphMem_attribute cyvalue + cdef void* cyvalue_ptr + cdef cyruntime.cudaGraphMemAttributeType cyattr + cyattr = int(attr) + cyvalue = _HelperCUgraphMem_attribute(attr, 0, is_getter=True) + cyvalue_ptr = cyvalue.cptr with nogil: err = cyruntime.cudaDeviceGetGraphMemAttribute(device, cyattr, cyvalue_ptr) if err != cyruntime.cudaSuccess: @@ -34688,9 +35086,12 @@ def cudaDeviceSetGraphMemAttribute(int device, attr not None : cudaGraphMemAttri -------- :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync` """ - cdef cyruntime.cudaGraphMemAttributeType cyattr = int(attr) - cdef _HelperCUgraphMem_attribute cyvalue = _HelperCUgraphMem_attribute(attr, value, is_getter=False) - cdef void* cyvalue_ptr = cyvalue.cptr + cdef _HelperCUgraphMem_attribute cyvalue + cdef void* cyvalue_ptr + cdef cyruntime.cudaGraphMemAttributeType cyattr + cyattr = int(attr) + cyvalue = _HelperCUgraphMem_attribute(attr, value, is_getter=False) + cyvalue_ptr = cyvalue.cptr with nogil: err = cyruntime.cudaDeviceSetGraphMemAttribute(device, cyattr, cyvalue_ptr) return (_cudaError_t(err),) @@ -34731,6 +35132,8 @@ def cudaGraphClone(originalGraph): : Cloning is not supported for graphs which contain memory allocation nodes, memory free nodes, or conditional nodes. """ cdef cyruntime.cudaGraph_t cyoriginalGraph + cdef cudaGraph_t pGraphClone + pGraphClone = cudaGraph_t() if originalGraph is None: poriginalGraph = 0 elif isinstance(originalGraph, (cudaGraph_t,driver.CUgraph)): @@ -34738,7 +35141,6 @@ def cudaGraphClone(originalGraph): else: poriginalGraph = int(cudaGraph_t(originalGraph)) cyoriginalGraph = poriginalGraph - cdef cudaGraph_t pGraphClone = cudaGraph_t() with nogil: err = cyruntime.cudaGraphClone(pGraphClone._pvt_ptr, cyoriginalGraph) if err != cyruntime.cudaSuccess: @@ -34780,14 +35182,9 @@ def cudaGraphNodeFindInClone(originalNode, clonedGraph): :py:obj:`~.cudaGraphClone` """ cdef cyruntime.cudaGraph_t cyclonedGraph - if clonedGraph is None: - pclonedGraph = 0 - elif isinstance(clonedGraph, (cudaGraph_t,driver.CUgraph)): - pclonedGraph = int(clonedGraph) - else: - pclonedGraph = int(cudaGraph_t(clonedGraph)) - cyclonedGraph = pclonedGraph cdef cyruntime.cudaGraphNode_t cyoriginalNode + cdef cudaGraphNode_t pNode + pNode = cudaGraphNode_t() if originalNode is None: poriginalNode = 0 elif isinstance(originalNode, (cudaGraphNode_t,driver.CUgraphNode)): @@ -34795,7 +35192,13 @@ def cudaGraphNodeFindInClone(originalNode, clonedGraph): else: poriginalNode = int(cudaGraphNode_t(originalNode)) cyoriginalNode = poriginalNode - cdef cudaGraphNode_t pNode = cudaGraphNode_t() + if clonedGraph is None: + pclonedGraph = 0 + elif isinstance(clonedGraph, (cudaGraph_t,driver.CUgraph)): + pclonedGraph = int(clonedGraph) + else: + pclonedGraph = int(cudaGraph_t(clonedGraph)) + cyclonedGraph = pclonedGraph with nogil: err = cyruntime.cudaGraphNodeFindInClone(pNode._pvt_ptr, cyoriginalNode, cyclonedGraph) if err != cyruntime.cudaSuccess: @@ -34827,6 +35230,7 @@ def cudaGraphNodeGetType(node): -------- :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphKernelNodeGetParams`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphHostNodeGetParams`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemsetNodeGetParams`, :py:obj:`~.cudaGraphMemsetNodeSetParams` """ + cdef cyruntime.cudaGraphNodeType pType cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -34835,7 +35239,6 @@ def cudaGraphNodeGetType(node): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cyruntime.cudaGraphNodeType pType with nogil: err = cyruntime.cudaGraphNodeGetType(cynode, &pType) if err != cyruntime.cudaSuccess: @@ -34868,6 +35271,7 @@ def cudaGraphNodeGetContainingGraph(hNode): -------- :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphGetId` :py:obj:`~.cudaGraphExecGetId` """ + cdef cudaGraph_t phGraph cdef cyruntime.cudaGraphNode_t cyhNode if hNode is None: phNode = 0 @@ -34876,7 +35280,7 @@ def cudaGraphNodeGetContainingGraph(hNode): else: phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode - cdef cudaGraph_t phGraph = cudaGraph_t() + phGraph = cudaGraph_t() with nogil: err = cyruntime.cudaGraphNodeGetContainingGraph(cyhNode, phGraph._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -34910,6 +35314,7 @@ def cudaGraphNodeGetLocalId(hNode): -------- :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphGetId` :py:obj:`~.cudaGraphExecGetId` """ + cdef unsigned int nodeId = 0 cdef cyruntime.cudaGraphNode_t cyhNode if hNode is None: phNode = 0 @@ -34918,7 +35323,6 @@ def cudaGraphNodeGetLocalId(hNode): else: phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode - cdef unsigned int nodeId = 0 with nogil: err = cyruntime.cudaGraphNodeGetLocalId(cyhNode, &nodeId) if err != cyruntime.cudaSuccess: @@ -34948,6 +35352,7 @@ def cudaGraphNodeGetToolsId(hNode): -------- :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphGetId` :py:obj:`~.cudaGraphExecGetId` """ + cdef unsigned long long toolsNodeId = 0 cdef cyruntime.cudaGraphNode_t cyhNode if hNode is None: phNode = 0 @@ -34956,7 +35361,6 @@ def cudaGraphNodeGetToolsId(hNode): else: phNode = int(cudaGraphNode_t(hNode)) cyhNode = phNode - cdef unsigned long long toolsNodeId = 0 with nogil: err = cyruntime.cudaGraphNodeGetToolsId(cyhNode, &toolsNodeId) if err != cyruntime.cudaSuccess: @@ -34989,6 +35393,7 @@ def cudaGraphGetId(hGraph): -------- :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphExecGetId` """ + cdef unsigned int graphID = 0 cdef cyruntime.cudaGraph_t cyhGraph if hGraph is None: phGraph = 0 @@ -34997,7 +35402,6 @@ def cudaGraphGetId(hGraph): else: phGraph = int(cudaGraph_t(hGraph)) cyhGraph = phGraph - cdef unsigned int graphID = 0 with nogil: err = cyruntime.cudaGraphGetId(cyhGraph, &graphID) if err != cyruntime.cudaSuccess: @@ -35030,6 +35434,7 @@ def cudaGraphExecGetId(hGraphExec): -------- :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphGetId` """ + cdef unsigned int graphID = 0 cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -35038,7 +35443,6 @@ def cudaGraphExecGetId(hGraphExec): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - cdef unsigned int graphID = 0 with nogil: err = cyruntime.cudaGraphExecGetId(cyhGraphExec, &graphID) if err != cyruntime.cudaSuccess: @@ -35080,26 +35484,28 @@ def cudaGraphGetNodes(graph, size_t numNodes = 0): :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetType`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes` """ cdef size_t _graph_length = numNodes - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph cdef cyruntime.cudaGraphNode_t* cynodes = NULL pynodes = [] - if _graph_length != 0: - cynodes = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) - if cynodes is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - with nogil: - err = cyruntime.cudaGraphGetNodes(cygraph, cynodes, &numNodes) - if cudaError_t(err) == cudaError_t(0): - pynodes = [cudaGraphNode_t(init_value=cynodes[idx]) for idx in range(_graph_length)] - if cynodes is not NULL: - free(cynodes) + cdef cyruntime.cudaGraph_t cygraph + try: + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + if _graph_length != 0: + cynodes = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) + if cynodes is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + with nogil: + err = cyruntime.cudaGraphGetNodes(cygraph, cynodes, &numNodes) + finally: + if cudaError_t(err) == cudaError_t(0): + pynodes = [cudaGraphNode_t(init_value=cynodes[idx]) for idx in range(_graph_length)] + if cynodes is not NULL: + free(cynodes) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None, None) return (_cudaError_t_SUCCESS, pynodes, numNodes) @@ -35139,26 +35545,28 @@ def cudaGraphGetRootNodes(graph, size_t pNumRootNodes = 0): :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetType`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes` """ cdef size_t _graph_length = pNumRootNodes - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph cdef cyruntime.cudaGraphNode_t* cypRootNodes = NULL pypRootNodes = [] - if _graph_length != 0: - cypRootNodes = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) - if cypRootNodes is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - with nogil: - err = cyruntime.cudaGraphGetRootNodes(cygraph, cypRootNodes, &pNumRootNodes) - if cudaError_t(err) == cudaError_t(0): - pypRootNodes = [cudaGraphNode_t(init_value=cypRootNodes[idx]) for idx in range(_graph_length)] - if cypRootNodes is not NULL: - free(cypRootNodes) + cdef cyruntime.cudaGraph_t cygraph + try: + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + if _graph_length != 0: + cypRootNodes = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) + if cypRootNodes is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + with nogil: + err = cyruntime.cudaGraphGetRootNodes(cygraph, cypRootNodes, &pNumRootNodes) + finally: + if cudaError_t(err) == cudaError_t(0): + pypRootNodes = [cudaGraphNode_t(init_value=cypRootNodes[idx]) for idx in range(_graph_length)] + if cypRootNodes is not NULL: + free(cypRootNodes) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None, None) return (_cudaError_t_SUCCESS, pypRootNodes, pNumRootNodes) @@ -35209,46 +35617,48 @@ def cudaGraphGetEdges(graph, size_t numEdges = 0): :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes` """ cdef size_t _graph_length = numEdges - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL - pyfrom_ = [] - if _graph_length != 0: - cyfrom_ = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) - if cyfrom_ is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - cdef cyruntime.cudaGraphNode_t* cyto = NULL - pyto = [] - if _graph_length != 0: - cyto = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) - if cyto is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL pyedgeData = [] - if _graph_length != 0: - cyedgeData = calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData)) - if cyedgeData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - with nogil: - err = cyruntime.cudaGraphGetEdges(cygraph, cyfrom_, cyto, cyedgeData, &numEdges) - if cudaError_t(err) == cudaError_t(0): - pyfrom_ = [cudaGraphNode_t(init_value=cyfrom_[idx]) for idx in range(_graph_length)] - if cyfrom_ is not NULL: - free(cyfrom_) - if cudaError_t(err) == cudaError_t(0): - pyto = [cudaGraphNode_t(init_value=cyto[idx]) for idx in range(_graph_length)] - if cyto is not NULL: - free(cyto) - if cudaError_t(err) == cudaError_t(0): - pyedgeData = [cudaGraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] - if cyedgeData is not NULL: - free(cyedgeData) + cdef cyruntime.cudaGraphNode_t* cyto = NULL + pyto = [] + cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL + pyfrom_ = [] + cdef cyruntime.cudaGraph_t cygraph + try: + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + if _graph_length != 0: + cyfrom_ = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) + if cyfrom_ is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + if _graph_length != 0: + cyto = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) + if cyto is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + if _graph_length != 0: + cyedgeData = calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData)) + if cyedgeData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) + with nogil: + err = cyruntime.cudaGraphGetEdges(cygraph, cyfrom_, cyto, cyedgeData, &numEdges) + finally: + if cudaError_t(err) == cudaError_t(0): + pyfrom_ = [cudaGraphNode_t(init_value=cyfrom_[idx]) for idx in range(_graph_length)] + if cyfrom_ is not NULL: + free(cyfrom_) + if cudaError_t(err) == cudaError_t(0): + pyto = [cudaGraphNode_t(init_value=cyto[idx]) for idx in range(_graph_length)] + if cyto is not NULL: + free(cyto) + if cudaError_t(err) == cudaError_t(0): + pyedgeData = [cudaGraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + if cyedgeData is not NULL: + free(cyedgeData) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None, None, None, None) return (_cudaError_t_SUCCESS, pyfrom_, pyto, pyedgeData, numEdges) @@ -35296,36 +35706,38 @@ def cudaGraphNodeGetDependencies(node, size_t pNumDependencies = 0): :py:obj:`~.cudaGraphNodeGetDependentNodes`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies` """ cdef size_t _graph_length = pNumDependencies - cdef cyruntime.cudaGraphNode_t cynode - if node is None: - pnode = 0 - elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): - pnode = int(node) - else: - pnode = int(cudaGraphNode_t(node)) - cynode = pnode - cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - pypDependencies = [] - if _graph_length != 0: - cypDependencies = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL pyedgeData = [] - if _graph_length != 0: - cyedgeData = calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData)) - if cyedgeData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - with nogil: - err = cyruntime.cudaGraphNodeGetDependencies(cynode, cypDependencies, cyedgeData, &pNumDependencies) - if cudaError_t(err) == cudaError_t(0): - pypDependencies = [cudaGraphNode_t(init_value=cypDependencies[idx]) for idx in range(_graph_length)] - if cypDependencies is not NULL: - free(cypDependencies) - if cudaError_t(err) == cudaError_t(0): - pyedgeData = [cudaGraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] - if cyedgeData is not NULL: - free(cyedgeData) + cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pypDependencies = [] + cdef cyruntime.cudaGraphNode_t cynode + try: + if node is None: + pnode = 0 + elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): + pnode = int(node) + else: + pnode = int(cudaGraphNode_t(node)) + cynode = pnode + if _graph_length != 0: + cypDependencies = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + if _graph_length != 0: + cyedgeData = calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData)) + if cyedgeData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) + with nogil: + err = cyruntime.cudaGraphNodeGetDependencies(cynode, cypDependencies, cyedgeData, &pNumDependencies) + finally: + if cudaError_t(err) == cudaError_t(0): + pypDependencies = [cudaGraphNode_t(init_value=cypDependencies[idx]) for idx in range(_graph_length)] + if cypDependencies is not NULL: + free(cypDependencies) + if cudaError_t(err) == cudaError_t(0): + pyedgeData = [cudaGraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + if cyedgeData is not NULL: + free(cyedgeData) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None, None, None) return (_cudaError_t_SUCCESS, pypDependencies, pyedgeData, pNumDependencies) @@ -35373,36 +35785,38 @@ def cudaGraphNodeGetDependentNodes(node, size_t pNumDependentNodes = 0): :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies` """ cdef size_t _graph_length = pNumDependentNodes - cdef cyruntime.cudaGraphNode_t cynode - if node is None: - pnode = 0 - elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): - pnode = int(node) - else: - pnode = int(cudaGraphNode_t(node)) - cynode = pnode - cdef cyruntime.cudaGraphNode_t* cypDependentNodes = NULL - pypDependentNodes = [] - if _graph_length != 0: - cypDependentNodes = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) - if cypDependentNodes is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL pyedgeData = [] - if _graph_length != 0: - cyedgeData = calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData)) - if cyedgeData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - with nogil: - err = cyruntime.cudaGraphNodeGetDependentNodes(cynode, cypDependentNodes, cyedgeData, &pNumDependentNodes) - if cudaError_t(err) == cudaError_t(0): - pypDependentNodes = [cudaGraphNode_t(init_value=cypDependentNodes[idx]) for idx in range(_graph_length)] - if cypDependentNodes is not NULL: - free(cypDependentNodes) - if cudaError_t(err) == cudaError_t(0): - pyedgeData = [cudaGraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] - if cyedgeData is not NULL: - free(cyedgeData) + cdef cyruntime.cudaGraphNode_t* cypDependentNodes = NULL + pypDependentNodes = [] + cdef cyruntime.cudaGraphNode_t cynode + try: + if node is None: + pnode = 0 + elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): + pnode = int(node) + else: + pnode = int(cudaGraphNode_t(node)) + cynode = pnode + if _graph_length != 0: + cypDependentNodes = calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t)) + if cypDependentNodes is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + if _graph_length != 0: + cyedgeData = calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData)) + if cyedgeData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) + with nogil: + err = cyruntime.cudaGraphNodeGetDependentNodes(cynode, cypDependentNodes, cyedgeData, &pNumDependentNodes) + finally: + if cudaError_t(err) == cudaError_t(0): + pypDependentNodes = [cudaGraphNode_t(init_value=cypDependentNodes[idx]) for idx in range(_graph_length)] + if cypDependentNodes is not NULL: + free(cypDependentNodes) + if cudaError_t(err) == cudaError_t(0): + pyedgeData = [cudaGraphEdgeData(_ptr=&cyedgeData[idx]) for idx in range(_graph_length)] + if cyedgeData is not NULL: + free(cyedgeData) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None, None, None) return (_cudaError_t_SUCCESS, pypDependentNodes, pyedgeData, pNumDependentNodes) @@ -35444,60 +35858,62 @@ def cudaGraphAddDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | li -------- :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes` """ - edgeData = [] if edgeData is None else edgeData - if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData): - raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") - to = [] if to is None else to - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to): - raise TypeError("Argument 'to' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - from_ = [] if from_ is None else from_ - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_): - raise TypeError("Argument 'from_' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL - if len(from_) > 1: - cyfrom_ = calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t)) - if cyfrom_ is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(from_)): - cyfrom_[idx] = (from_[idx])._pvt_ptr[0] - elif len(from_) == 1: - cyfrom_ = (from_[0])._pvt_ptr - cdef cyruntime.cudaGraphNode_t* cyto = NULL - if len(to) > 1: - cyto = calloc(len(to), sizeof(cyruntime.cudaGraphNode_t)) - if cyto is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(to)): - cyto[idx] = (to[idx])._pvt_ptr[0] - elif len(to) == 1: - cyto = (to[0])._pvt_ptr cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL - if len(edgeData) > 1: - cyedgeData = calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData)) - if cyedgeData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - for idx in range(len(edgeData)): - string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) - elif len(edgeData) == 1: - cyedgeData = (edgeData[0])._pvt_ptr - with nogil: - err = cyruntime.cudaGraphAddDependencies(cygraph, cyfrom_, cyto, cyedgeData, numDependencies) - if len(from_) > 1 and cyfrom_ is not NULL: - free(cyfrom_) - if len(to) > 1 and cyto is not NULL: - free(cyto) - if len(edgeData) > 1 and cyedgeData is not NULL: - free(cyedgeData) + cdef cyruntime.cudaGraphNode_t* cyto = NULL + cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL + cdef cyruntime.cudaGraph_t cygraph + try: + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + from_ = [] if from_ is None else from_ + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_): + raise TypeError("Argument 'from_' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(from_) > 1: + cyfrom_ = calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t)) + if cyfrom_ is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(from_)): + cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr + to = [] if to is None else to + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to): + raise TypeError("Argument 'to' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(to) > 1: + cyto = calloc(len(to), sizeof(cyruntime.cudaGraphNode_t)) + if cyto is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(to)): + cyto[idx] = (to[idx])._pvt_ptr[0] + elif len(to) == 1: + cyto = (to[0])._pvt_ptr + edgeData = [] if edgeData is None else edgeData + if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData): + raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") + if len(edgeData) > 1: + cyedgeData = calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData)) + if cyedgeData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) + for idx in range(len(edgeData)): + string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) + elif len(edgeData) == 1: + cyedgeData = (edgeData[0])._pvt_ptr + with nogil: + err = cyruntime.cudaGraphAddDependencies(cygraph, cyfrom_, cyto, cyedgeData, numDependencies) + finally: + if len(from_) > 1 and cyfrom_ is not NULL: + free(cyfrom_) + if len(to) > 1 and cyto is not NULL: + free(cyto) + if len(edgeData) > 1 and cyedgeData is not NULL: + free(cyedgeData) return (_cudaError_t(err),) {{endif}} @@ -35540,60 +35956,62 @@ def cudaGraphRemoveDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | -------- :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes` """ - edgeData = [] if edgeData is None else edgeData - if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData): - raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") - to = [] if to is None else to - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to): - raise TypeError("Argument 'to' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - from_ = [] if from_ is None else from_ - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_): - raise TypeError("Argument 'from_' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL - if len(from_) > 1: - cyfrom_ = calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t)) - if cyfrom_ is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(from_)): - cyfrom_[idx] = (from_[idx])._pvt_ptr[0] - elif len(from_) == 1: - cyfrom_ = (from_[0])._pvt_ptr - cdef cyruntime.cudaGraphNode_t* cyto = NULL - if len(to) > 1: - cyto = calloc(len(to), sizeof(cyruntime.cudaGraphNode_t)) - if cyto is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(to)): - cyto[idx] = (to[idx])._pvt_ptr[0] - elif len(to) == 1: - cyto = (to[0])._pvt_ptr cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL - if len(edgeData) > 1: - cyedgeData = calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData)) - if cyedgeData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - for idx in range(len(edgeData)): - string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) - elif len(edgeData) == 1: - cyedgeData = (edgeData[0])._pvt_ptr - with nogil: - err = cyruntime.cudaGraphRemoveDependencies(cygraph, cyfrom_, cyto, cyedgeData, numDependencies) - if len(from_) > 1 and cyfrom_ is not NULL: - free(cyfrom_) - if len(to) > 1 and cyto is not NULL: - free(cyto) - if len(edgeData) > 1 and cyedgeData is not NULL: - free(cyedgeData) + cdef cyruntime.cudaGraphNode_t* cyto = NULL + cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL + cdef cyruntime.cudaGraph_t cygraph + try: + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + from_ = [] if from_ is None else from_ + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_): + raise TypeError("Argument 'from_' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(from_) > 1: + cyfrom_ = calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t)) + if cyfrom_ is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(from_)): + cyfrom_[idx] = (from_[idx])._pvt_ptr[0] + elif len(from_) == 1: + cyfrom_ = (from_[0])._pvt_ptr + to = [] if to is None else to + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to): + raise TypeError("Argument 'to' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(to) > 1: + cyto = calloc(len(to), sizeof(cyruntime.cudaGraphNode_t)) + if cyto is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(to)): + cyto[idx] = (to[idx])._pvt_ptr[0] + elif len(to) == 1: + cyto = (to[0])._pvt_ptr + edgeData = [] if edgeData is None else edgeData + if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData): + raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") + if len(edgeData) > 1: + cyedgeData = calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData)) + if cyedgeData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) + for idx in range(len(edgeData)): + string.memcpy(&cyedgeData[idx], (edgeData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) + elif len(edgeData) == 1: + cyedgeData = (edgeData[0])._pvt_ptr + with nogil: + err = cyruntime.cudaGraphRemoveDependencies(cygraph, cyfrom_, cyto, cyedgeData, numDependencies) + finally: + if len(from_) > 1 and cyfrom_ is not NULL: + free(cyfrom_) + if len(to) > 1 and cyto is not NULL: + free(cyto) + if len(edgeData) > 1 and cyedgeData is not NULL: + free(cyedgeData) return (_cudaError_t(err),) {{endif}} @@ -35725,6 +36143,8 @@ def cudaGraphInstantiate(graph, unsigned long long flags): :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy` """ cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphExec_t pGraphExec + pGraphExec = cudaGraphExec_t() if graph is None: pgraph = 0 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): @@ -35732,7 +36152,6 @@ def cudaGraphInstantiate(graph, unsigned long long flags): else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t() with nogil: err = cyruntime.cudaGraphInstantiate(pGraphExec._pvt_ptr, cygraph, flags) if err != cyruntime.cudaSuccess: @@ -35831,6 +36250,8 @@ def cudaGraphInstantiateWithFlags(graph, unsigned long long flags): :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy` """ cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphExec_t pGraphExec + pGraphExec = cudaGraphExec_t() if graph is None: pgraph = 0 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): @@ -35838,7 +36259,6 @@ def cudaGraphInstantiateWithFlags(graph, unsigned long long flags): else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t() with nogil: err = cyruntime.cudaGraphInstantiateWithFlags(pGraphExec._pvt_ptr, cygraph, flags) if err != cyruntime.cudaSuccess: @@ -35976,7 +36396,10 @@ def cudaGraphInstantiateWithParams(graph, instantiateParams : Optional[cudaGraph -------- :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphExecDestroy` """ + cdef cyruntime.cudaGraphInstantiateParams* cyinstantiateParams_ptr cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphExec_t pGraphExec + pGraphExec = cudaGraphExec_t() if graph is None: pgraph = 0 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): @@ -35984,8 +36407,7 @@ def cudaGraphInstantiateWithParams(graph, instantiateParams : Optional[cudaGraph else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t() - cdef cyruntime.cudaGraphInstantiateParams* cyinstantiateParams_ptr = instantiateParams._pvt_ptr if instantiateParams is not None else NULL + cyinstantiateParams_ptr = instantiateParams._pvt_ptr if instantiateParams is not None else NULL with nogil: err = cyruntime.cudaGraphInstantiateWithParams(pGraphExec._pvt_ptr, cygraph, cyinstantiateParams_ptr) if err != cyruntime.cudaSuccess: @@ -36020,6 +36442,7 @@ def cudaGraphExecGetFlags(graphExec): -------- :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphInstantiateWithParams` """ + cdef unsigned long long flags = 0 cdef cyruntime.cudaGraphExec_t cygraphExec if graphExec is None: pgraphExec = 0 @@ -36028,7 +36451,6 @@ def cudaGraphExecGetFlags(graphExec): else: pgraphExec = int(cudaGraphExec_t(graphExec)) cygraphExec = pgraphExec - cdef unsigned long long flags = 0 with nogil: err = cyruntime.cudaGraphExecGetFlags(cygraphExec, &flags) if err != cyruntime.cudaSuccess: @@ -36096,14 +36518,8 @@ def cudaGraphExecKernelNodeSetParams(hGraphExec, node, pNodeParams : Optional[cu -------- :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ + cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t cynode - if node is None: - pnode = 0 - elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): - pnode = int(node) - else: - pnode = int(cudaGraphNode_t(node)) - cynode = pnode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36112,7 +36528,14 @@ def cudaGraphExecKernelNodeSetParams(hGraphExec, node, pNodeParams : Optional[cu else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL + if node is None: + pnode = 0 + elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): + pnode = int(node) + else: + pnode = int(cudaGraphNode_t(node)) + cynode = pnode + cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphExecKernelNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) return (_cudaError_t(err),) @@ -36161,14 +36584,8 @@ def cudaGraphExecMemcpyNodeSetParams(hGraphExec, node, pNodeParams : Optional[cu -------- :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParamsToSymbol`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParamsFromSymbol`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ + cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t cynode - if node is None: - pnode = 0 - elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): - pnode = int(node) - else: - pnode = int(cudaGraphNode_t(node)) - cynode = pnode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36177,7 +36594,14 @@ def cudaGraphExecMemcpyNodeSetParams(hGraphExec, node, pNodeParams : Optional[cu else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL + if node is None: + pnode = 0 + elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): + pnode = int(node) + else: + pnode = int(cudaGraphNode_t(node)) + cynode = pnode + cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphExecMemcpyNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) return (_cudaError_t(err),) @@ -36230,31 +36654,36 @@ def cudaGraphExecMemcpyNodeSetParams1D(hGraphExec, node, dst, src, size_t count, -------- :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemcpyNode1D`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ + cdef cyruntime.cudaMemcpyKind cykind + cdef _HelperInputVoidPtrStruct cysrcHelper + cdef void* cysrc + cdef _HelperInputVoidPtrStruct cydstHelper + cdef void* cydst cdef cyruntime.cudaGraphNode_t cynode - if node is None: - pnode = 0 - elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): - pnode = int(node) - else: - pnode = int(cudaGraphNode_t(node)) - cynode = pnode cdef cyruntime.cudaGraphExec_t cyhGraphExec - if hGraphExec is None: - phGraphExec = 0 - elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)): - phGraphExec = int(hGraphExec) - else: - phGraphExec = int(cudaGraphExec_t(hGraphExec)) - cyhGraphExec = phGraphExec - cdef _HelperInputVoidPtrStruct cydstHelper - cdef void* cydst = _helper_input_void_ptr(dst, &cydstHelper) - cdef _HelperInputVoidPtrStruct cysrcHelper - cdef void* cysrc = _helper_input_void_ptr(src, &cysrcHelper) - cdef cyruntime.cudaMemcpyKind cykind = int(kind) - with nogil: - err = cyruntime.cudaGraphExecMemcpyNodeSetParams1D(cyhGraphExec, cynode, cydst, cysrc, count, cykind) - _helper_input_void_ptr_free(&cydstHelper) - _helper_input_void_ptr_free(&cysrcHelper) + try: + if hGraphExec is None: + phGraphExec = 0 + elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)): + phGraphExec = int(hGraphExec) + else: + phGraphExec = int(cudaGraphExec_t(hGraphExec)) + cyhGraphExec = phGraphExec + if node is None: + pnode = 0 + elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): + pnode = int(node) + else: + pnode = int(cudaGraphNode_t(node)) + cynode = pnode + cydst = _helper_input_void_ptr(dst, &cydstHelper) + cysrc = _helper_input_void_ptr(src, &cysrcHelper) + cykind = int(kind) + with nogil: + err = cyruntime.cudaGraphExecMemcpyNodeSetParams1D(cyhGraphExec, cynode, cydst, cysrc, count, cykind) + finally: + _helper_input_void_ptr_free(&cydstHelper) + _helper_input_void_ptr_free(&cysrcHelper) return (_cudaError_t(err),) {{endif}} @@ -36306,14 +36735,8 @@ def cudaGraphExecMemsetNodeSetParams(hGraphExec, node, pNodeParams : Optional[cu -------- :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ + cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t cynode - if node is None: - pnode = 0 - elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): - pnode = int(node) - else: - pnode = int(cudaGraphNode_t(node)) - cynode = pnode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36322,7 +36745,14 @@ def cudaGraphExecMemsetNodeSetParams(hGraphExec, node, pNodeParams : Optional[cu else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL + if node is None: + pnode = 0 + elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): + pnode = int(node) + else: + pnode = int(cudaGraphNode_t(node)) + cynode = pnode + cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphExecMemsetNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) return (_cudaError_t(err),) @@ -36361,14 +36791,8 @@ def cudaGraphExecHostNodeSetParams(hGraphExec, node, pNodeParams : Optional[cuda -------- :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ + cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t cynode - if node is None: - pnode = 0 - elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): - pnode = int(node) - else: - pnode = int(cudaGraphNode_t(node)) - cynode = pnode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36377,7 +36801,14 @@ def cudaGraphExecHostNodeSetParams(hGraphExec, node, pNodeParams : Optional[cuda else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL + if node is None: + pnode = 0 + elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): + pnode = int(node) + else: + pnode = int(cudaGraphNode_t(node)) + cynode = pnode + cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphExecHostNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr) return (_cudaError_t(err),) @@ -36425,21 +36856,7 @@ def cudaGraphExecChildGraphNodeSetParams(hGraphExec, node, childGraph): :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ cdef cyruntime.cudaGraph_t cychildGraph - if childGraph is None: - pchildGraph = 0 - elif isinstance(childGraph, (cudaGraph_t,driver.CUgraph)): - pchildGraph = int(childGraph) - else: - pchildGraph = int(cudaGraph_t(childGraph)) - cychildGraph = pchildGraph cdef cyruntime.cudaGraphNode_t cynode - if node is None: - pnode = 0 - elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): - pnode = int(node) - else: - pnode = int(cudaGraphNode_t(node)) - cynode = pnode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36448,6 +36865,20 @@ def cudaGraphExecChildGraphNodeSetParams(hGraphExec, node, childGraph): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec + if node is None: + pnode = 0 + elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): + pnode = int(node) + else: + pnode = int(cudaGraphNode_t(node)) + cynode = pnode + if childGraph is None: + pchildGraph = 0 + elif isinstance(childGraph, (cudaGraph_t,driver.CUgraph)): + pchildGraph = int(childGraph) + else: + pchildGraph = int(cudaGraph_t(childGraph)) + cychildGraph = pchildGraph with nogil: err = cyruntime.cudaGraphExecChildGraphNodeSetParams(cyhGraphExec, cynode, cychildGraph) return (_cudaError_t(err),) @@ -36488,21 +36919,7 @@ def cudaGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event): :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ cdef cyruntime.cudaEvent_t cyevent - if event is None: - pevent = 0 - elif isinstance(event, (cudaEvent_t,driver.CUevent)): - pevent = int(event) - else: - pevent = int(cudaEvent_t(event)) - cyevent = pevent cdef cyruntime.cudaGraphNode_t cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): - phNode = int(hNode) - else: - phNode = int(cudaGraphNode_t(hNode)) - cyhNode = phNode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36511,6 +36928,20 @@ def cudaGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec + if hNode is None: + phNode = 0 + elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): + phNode = int(hNode) + else: + phNode = int(cudaGraphNode_t(hNode)) + cyhNode = phNode + if event is None: + pevent = 0 + elif isinstance(event, (cudaEvent_t,driver.CUevent)): + pevent = int(event) + else: + pevent = int(cudaEvent_t(event)) + cyevent = pevent with nogil: err = cyruntime.cudaGraphExecEventRecordNodeSetEvent(cyhGraphExec, cyhNode, cyevent) return (_cudaError_t(err),) @@ -36551,21 +36982,7 @@ def cudaGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event): :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ cdef cyruntime.cudaEvent_t cyevent - if event is None: - pevent = 0 - elif isinstance(event, (cudaEvent_t,driver.CUevent)): - pevent = int(event) - else: - pevent = int(cudaEvent_t(event)) - cyevent = pevent cdef cyruntime.cudaGraphNode_t cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): - phNode = int(hNode) - else: - phNode = int(cudaGraphNode_t(hNode)) - cyhNode = phNode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36574,6 +36991,20 @@ def cudaGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec + if hNode is None: + phNode = 0 + elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): + phNode = int(hNode) + else: + phNode = int(cudaGraphNode_t(hNode)) + cyhNode = phNode + if event is None: + pevent = 0 + elif isinstance(event, (cudaEvent_t,driver.CUevent)): + pevent = int(event) + else: + pevent = int(cudaEvent_t(event)) + cyevent = pevent with nogil: err = cyruntime.cudaGraphExecEventWaitNodeSetEvent(cyhGraphExec, cyhNode, cyevent) return (_cudaError_t(err),) @@ -36617,14 +37048,8 @@ def cudaGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodePa -------- :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ + cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): - phNode = int(hNode) - else: - phNode = int(cudaGraphNode_t(hNode)) - cyhNode = phNode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36633,7 +37058,14 @@ def cudaGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodePa else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + if hNode is None: + phNode = 0 + elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): + phNode = int(hNode) + else: + phNode = int(cudaGraphNode_t(hNode)) + cyhNode = phNode + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphExecExternalSemaphoresSignalNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_cudaError_t(err),) @@ -36677,14 +37109,8 @@ def cudaGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodePara -------- :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ + cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): - phNode = int(hNode) - else: - phNode = int(cudaGraphNode_t(hNode)) - cyhNode = phNode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36693,7 +37119,14 @@ def cudaGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodePara else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + if hNode is None: + phNode = 0 + elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): + phNode = int(hNode) + else: + phNode = int(cudaGraphNode_t(hNode)) + cyhNode = phNode + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphExecExternalSemaphoresWaitNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr) return (_cudaError_t(err),) @@ -36742,13 +37175,6 @@ def cudaGraphNodeSetEnabled(hGraphExec, hNode, unsigned int isEnabled): Currently only kernel, memset and memcpy nodes are supported. """ cdef cyruntime.cudaGraphNode_t cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): - phNode = int(hNode) - else: - phNode = int(cudaGraphNode_t(hNode)) - cyhNode = phNode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36757,6 +37183,13 @@ def cudaGraphNodeSetEnabled(hGraphExec, hNode, unsigned int isEnabled): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec + if hNode is None: + phNode = 0 + elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): + phNode = int(hNode) + else: + phNode = int(cudaGraphNode_t(hNode)) + cyhNode = phNode with nogil: err = cyruntime.cudaGraphNodeSetEnabled(cyhGraphExec, cyhNode, isEnabled) return (_cudaError_t(err),) @@ -36797,14 +37230,8 @@ def cudaGraphNodeGetEnabled(hGraphExec, hNode): ----- Currently only kernel, memset and memcpy nodes are supported. """ + cdef unsigned int isEnabled = 0 cdef cyruntime.cudaGraphNode_t cyhNode - if hNode is None: - phNode = 0 - elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): - phNode = int(hNode) - else: - phNode = int(cudaGraphNode_t(hNode)) - cyhNode = phNode cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36813,7 +37240,13 @@ def cudaGraphNodeGetEnabled(hGraphExec, hNode): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - cdef unsigned int isEnabled = 0 + if hNode is None: + phNode = 0 + elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)): + phNode = int(hNode) + else: + phNode = int(cudaGraphNode_t(hNode)) + cyhNode = phNode with nogil: err = cyruntime.cudaGraphNodeGetEnabled(cyhGraphExec, cyhNode, &isEnabled) if err != cyruntime.cudaSuccess: @@ -36973,14 +37406,8 @@ def cudaGraphExecUpdate(hGraphExec, hGraph): -------- :py:obj:`~.cudaGraphInstantiate` """ + cdef cudaGraphExecUpdateResultInfo resultInfo cdef cyruntime.cudaGraph_t cyhGraph - if hGraph is None: - phGraph = 0 - elif isinstance(hGraph, (cudaGraph_t,driver.CUgraph)): - phGraph = int(hGraph) - else: - phGraph = int(cudaGraph_t(hGraph)) - cyhGraph = phGraph cdef cyruntime.cudaGraphExec_t cyhGraphExec if hGraphExec is None: phGraphExec = 0 @@ -36989,7 +37416,14 @@ def cudaGraphExecUpdate(hGraphExec, hGraph): else: phGraphExec = int(cudaGraphExec_t(hGraphExec)) cyhGraphExec = phGraphExec - cdef cudaGraphExecUpdateResultInfo resultInfo = cudaGraphExecUpdateResultInfo() + if hGraph is None: + phGraph = 0 + elif isinstance(hGraph, (cudaGraph_t,driver.CUgraph)): + phGraph = int(hGraph) + else: + phGraph = int(cudaGraph_t(hGraph)) + cyhGraph = phGraph + resultInfo = cudaGraphExecUpdateResultInfo() with nogil: err = cyruntime.cudaGraphExecUpdate(cyhGraphExec, cyhGraph, resultInfo._pvt_ptr) if err != cyruntime.cudaSuccess: @@ -37026,13 +37460,6 @@ def cudaGraphUpload(graphExec, stream): :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef cyruntime.cudaGraphExec_t cygraphExec if graphExec is None: pgraphExec = 0 @@ -37041,6 +37468,13 @@ def cudaGraphUpload(graphExec, stream): else: pgraphExec = int(cudaGraphExec_t(graphExec)) cygraphExec = pgraphExec + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream with nogil: err = cyruntime.cudaGraphUpload(cygraphExec, cystream) return (_cudaError_t(err),) @@ -37080,13 +37514,6 @@ def cudaGraphLaunch(graphExec, stream): :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphExecDestroy` """ cdef cyruntime.cudaStream_t cystream - if stream is None: - pstream = 0 - elif isinstance(stream, (cudaStream_t,driver.CUstream)): - pstream = int(stream) - else: - pstream = int(cudaStream_t(stream)) - cystream = pstream cdef cyruntime.cudaGraphExec_t cygraphExec if graphExec is None: pgraphExec = 0 @@ -37095,6 +37522,13 @@ def cudaGraphLaunch(graphExec, stream): else: pgraphExec = int(cudaGraphExec_t(graphExec)) cygraphExec = pgraphExec + if stream is None: + pstream = 0 + elif isinstance(stream, (cudaStream_t,driver.CUstream)): + pstream = int(stream) + else: + pstream = int(cudaStream_t(stream)) + cystream = pstream with nogil: err = cyruntime.cudaGraphLaunch(cygraphExec, cystream) return (_cudaError_t(err),) @@ -37255,19 +37689,23 @@ def cudaUserObjectCreate(ptr, destroy, unsigned int initialRefcount, unsigned in :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate` """ cdef cyruntime.cudaHostFn_t cydestroy - if destroy is None: - pdestroy = 0 - elif isinstance(destroy, (cudaHostFn_t,)): - pdestroy = int(destroy) - else: - pdestroy = int(cudaHostFn_t(destroy)) - cydestroy = pdestroy - cdef cudaUserObject_t object_out = cudaUserObject_t() cdef _HelperInputVoidPtrStruct cyptrHelper - cdef void* cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) - with nogil: - err = cyruntime.cudaUserObjectCreate(object_out._pvt_ptr, cyptr, cydestroy, initialRefcount, flags) - _helper_input_void_ptr_free(&cyptrHelper) + cdef void* cyptr + cdef cudaUserObject_t object_out + try: + object_out = cudaUserObject_t() + cyptr = _helper_input_void_ptr(ptr, &cyptrHelper) + if destroy is None: + pdestroy = 0 + elif isinstance(destroy, (cudaHostFn_t,)): + pdestroy = int(destroy) + else: + pdestroy = int(cudaHostFn_t(destroy)) + cydestroy = pdestroy + with nogil: + err = cyruntime.cudaUserObjectCreate(object_out._pvt_ptr, cyptr, cydestroy, initialRefcount, flags) + finally: + _helper_input_void_ptr_free(&cyptrHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, object_out) @@ -37396,13 +37834,6 @@ def cudaGraphRetainUserObject(graph, object, unsigned int count, unsigned int fl :py:obj:`~.cudaUserObjectCreate` :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate` """ cdef cyruntime.cudaUserObject_t cyobject - if object is None: - pobject = 0 - elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)): - pobject = int(object) - else: - pobject = int(cudaUserObject_t(object)) - cyobject = pobject cdef cyruntime.cudaGraph_t cygraph if graph is None: pgraph = 0 @@ -37411,6 +37842,13 @@ def cudaGraphRetainUserObject(graph, object, unsigned int count, unsigned int fl else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph + if object is None: + pobject = 0 + elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)): + pobject = int(object) + else: + pobject = int(cudaUserObject_t(object)) + cyobject = pobject with nogil: err = cyruntime.cudaGraphRetainUserObject(cygraph, cyobject, count, flags) return (_cudaError_t(err),) @@ -37447,13 +37885,6 @@ def cudaGraphReleaseUserObject(graph, object, unsigned int count): :py:obj:`~.cudaUserObjectCreate` :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphCreate` """ cdef cyruntime.cudaUserObject_t cyobject - if object is None: - pobject = 0 - elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)): - pobject = int(object) - else: - pobject = int(cudaUserObject_t(object)) - cyobject = pobject cdef cyruntime.cudaGraph_t cygraph if graph is None: pgraph = 0 @@ -37462,6 +37893,13 @@ def cudaGraphReleaseUserObject(graph, object, unsigned int count): else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph + if object is None: + pobject = 0 + elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)): + pobject = int(object) + else: + pobject = int(cudaUserObject_t(object)) + cyobject = pobject with nogil: err = cyruntime.cudaGraphReleaseUserObject(cygraph, cyobject, count) return (_cudaError_t(err),) @@ -37517,47 +37955,51 @@ def cudaGraphAddNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | li -------- :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphExecNodeSetParams` """ - dependencyData = [] if dependencyData is None else dependencyData - if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData): - raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") - pDependencies = [] if pDependencies is None else pDependencies - if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): - raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") - cdef cyruntime.cudaGraph_t cygraph - if graph is None: - pgraph = 0 - elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): - pgraph = int(graph) - else: - pgraph = int(cudaGraph_t(graph)) - cygraph = pgraph - cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t() - cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL - if len(pDependencies) > 1: - cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) - if cypDependencies is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) - else: - for idx in range(len(pDependencies)): - cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] - elif len(pDependencies) == 1: - cypDependencies = (pDependencies[0])._pvt_ptr + cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL - if len(dependencyData) > 1: - cydependencyData = calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData)) - if cydependencyData is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) - for idx in range(len(dependencyData)): - string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) - elif len(dependencyData) == 1: - cydependencyData = (dependencyData[0])._pvt_ptr - cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL - with nogil: - err = cyruntime.cudaGraphAddNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, cydependencyData, numDependencies, cynodeParams_ptr) - if len(pDependencies) > 1 and cypDependencies is not NULL: - free(cypDependencies) - if len(dependencyData) > 1 and cydependencyData is not NULL: - free(cydependencyData) + cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphNode_t pGraphNode + try: + pGraphNode = cudaGraphNode_t() + if graph is None: + pgraph = 0 + elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): + pgraph = int(graph) + else: + pgraph = int(cudaGraph_t(graph)) + cygraph = pgraph + pDependencies = [] if pDependencies is None else pDependencies + if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): + raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") + if len(pDependencies) > 1: + cypDependencies = calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t)) + if cypDependencies is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t))) + else: + for idx in range(len(pDependencies)): + cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] + elif len(pDependencies) == 1: + cypDependencies = (pDependencies[0])._pvt_ptr + dependencyData = [] if dependencyData is None else dependencyData + if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData): + raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") + if len(dependencyData) > 1: + cydependencyData = calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData)) + if cydependencyData is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData))) + for idx in range(len(dependencyData)): + string.memcpy(&cydependencyData[idx], (dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData)) + elif len(dependencyData) == 1: + cydependencyData = (dependencyData[0])._pvt_ptr + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + with nogil: + err = cyruntime.cudaGraphAddNode(pGraphNode._pvt_ptr, cygraph, cypDependencies, cydependencyData, numDependencies, cynodeParams_ptr) + finally: + if len(pDependencies) > 1 and cypDependencies is not NULL: + free(cypDependencies) + if len(dependencyData) > 1 and cydependencyData is not NULL: + free(cydependencyData) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pGraphNode) @@ -37593,6 +38035,7 @@ def cudaGraphNodeSetParams(node, nodeParams : Optional[cudaGraphNodeParams]): -------- :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExecNodeSetParams` """ + cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t cynode if node is None: pnode = 0 @@ -37601,7 +38044,7 @@ def cudaGraphNodeSetParams(node, nodeParams : Optional[cudaGraphNodeParams]): else: pnode = int(cudaGraphNode_t(node)) cynode = pnode - cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphNodeSetParams(cynode, cynodeParams_ptr) return (_cudaError_t(err),) @@ -37645,14 +38088,8 @@ def cudaGraphExecNodeSetParams(graphExec, node, nodeParams : Optional[cudaGraphN -------- :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphNodeSetParams` :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` """ + cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t cynode - if node is None: - pnode = 0 - elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): - pnode = int(node) - else: - pnode = int(cudaGraphNode_t(node)) - cynode = pnode cdef cyruntime.cudaGraphExec_t cygraphExec if graphExec is None: pgraphExec = 0 @@ -37661,7 +38098,14 @@ def cudaGraphExecNodeSetParams(graphExec, node, nodeParams : Optional[cudaGraphN else: pgraphExec = int(cudaGraphExec_t(graphExec)) cygraphExec = pgraphExec - cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL + if node is None: + pnode = 0 + elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)): + pnode = int(node) + else: + pnode = int(cudaGraphNode_t(node)) + cynode = pnode + cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL with nogil: err = cyruntime.cudaGraphExecNodeSetParams(cygraphExec, cynode, cynodeParams_ptr) return (_cudaError_t(err),) @@ -37704,6 +38148,8 @@ def cudaGraphConditionalHandleCreate(graph, unsigned int defaultLaunchValue, uns :py:obj:`~.cuGraphAddNode`, """ cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphConditionalHandle pHandle_out + pHandle_out = cudaGraphConditionalHandle() if graph is None: pgraph = 0 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): @@ -37711,7 +38157,6 @@ def cudaGraphConditionalHandleCreate(graph, unsigned int defaultLaunchValue, uns else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - cdef cudaGraphConditionalHandle pHandle_out = cudaGraphConditionalHandle() with nogil: err = cyruntime.cudaGraphConditionalHandleCreate(pHandle_out._pvt_ptr, cygraph, defaultLaunchValue, flags) if err != cyruntime.cudaSuccess: @@ -37759,14 +38204,9 @@ def cudaGraphConditionalHandleCreate_v2(graph, ctx, unsigned int defaultLaunchVa :py:obj:`~.cuGraphAddNode`, """ cdef cyruntime.cudaExecutionContext_t cyctx - if ctx is None: - pctx = 0 - elif isinstance(ctx, (cudaExecutionContext_t,)): - pctx = int(ctx) - else: - pctx = int(cudaExecutionContext_t(ctx)) - cyctx = pctx cdef cyruntime.cudaGraph_t cygraph + cdef cudaGraphConditionalHandle pHandle_out + pHandle_out = cudaGraphConditionalHandle() if graph is None: pgraph = 0 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)): @@ -37774,7 +38214,13 @@ def cudaGraphConditionalHandleCreate_v2(graph, ctx, unsigned int defaultLaunchVa else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - cdef cudaGraphConditionalHandle pHandle_out = cudaGraphConditionalHandle() + if ctx is None: + pctx = 0 + elif isinstance(ctx, (cudaExecutionContext_t,)): + pctx = int(ctx) + else: + pctx = int(cudaExecutionContext_t(ctx)) + cyctx = pctx with nogil: err = cyruntime.cudaGraphConditionalHandleCreate_v2(pHandle_out._pvt_ptr, cygraph, cyctx, defaultLaunchValue, flags) if err != cyruntime.cudaSuccess: @@ -37877,8 +38323,8 @@ def cudaGetDriverEntryPoint(char* symbol, unsigned long long flags): ----- This API is deprecated and :py:obj:`~.cudaGetDriverEntryPointByVersion` (with a hardcoded :py:obj:`~.cudaVersion`) should be used instead. """ - cdef void_ptr funcPtr = 0 cdef cyruntime.cudaDriverEntryPointQueryResult driverStatus + cdef void_ptr funcPtr = 0 with nogil: err = cyruntime.cudaGetDriverEntryPoint(symbol, &funcPtr, flags, &driverStatus) if err != cyruntime.cudaSuccess: @@ -37985,8 +38431,8 @@ def cudaGetDriverEntryPointByVersion(char* symbol, unsigned int cudaVersion, uns -------- :py:obj:`~.cuGetProcAddress` """ - cdef void_ptr funcPtr = 0 cdef cyruntime.cudaDriverEntryPointQueryResult driverStatus + cdef void_ptr funcPtr = 0 with nogil: err = cyruntime.cudaGetDriverEntryPointByVersion(symbol, &funcPtr, cudaVersion, flags, &driverStatus) if err != cyruntime.cudaSuccess: @@ -38063,32 +38509,42 @@ def cudaLibraryLoadData(code, jitOptions : Optional[tuple[cudaJitOption] | list[ -------- :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryLoadData` """ - libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues - libraryOptions = [] if libraryOptions is None else libraryOptions - if not all(isinstance(_x, (cudaLibraryOption)) for _x in libraryOptions): - raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cyruntime.cudaLibraryOption] or list[cyruntime.cudaLibraryOption]") - jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues - jitOptions = [] if jitOptions is None else jitOptions - if not all(isinstance(_x, (cudaJitOption)) for _x in jitOptions): - raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cyruntime.cudaJitOption] or list[cyruntime.cudaJitOption]") - cdef cudaLibrary_t library = cudaLibrary_t() + cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues + cdef void** cylibraryOptionValues_ptr + cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions + cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues + cdef void** cyjitOptionsValues_ptr + cdef vector[cyruntime.cudaJitOption] cyjitOptions cdef _HelperInputVoidPtrStruct cycodeHelper - cdef void* cycode = _helper_input_void_ptr(code, &cycodeHelper) - cdef vector[cyruntime.cudaJitOption] cyjitOptions = [int(pyjitOptions) for pyjitOptions in (jitOptions)] - pylist = [_HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr - if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) - if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) - cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions = [int(pylibraryOptions) for pylibraryOptions in (libraryOptions)] - pylist = [_HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr - if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions)) - if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions)) - with nogil: - err = cyruntime.cudaLibraryLoadData(library._pvt_ptr, cycode, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions) - _helper_input_void_ptr_free(&cycodeHelper) + cdef void* cycode + cdef cudaLibrary_t library + try: + library = cudaLibrary_t() + cycode = _helper_input_void_ptr(code, &cycodeHelper) + jitOptions = [] if jitOptions is None else jitOptions + if not all(isinstance(_x, (cudaJitOption)) for _x in jitOptions): + raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cyruntime.cudaJitOption] or list[cyruntime.cudaJitOption]") + cyjitOptions = jitOptions + jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues + pylist = [_HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] + voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) + cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr + if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) + if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) + libraryOptions = [] if libraryOptions is None else libraryOptions + if not all(isinstance(_x, (cudaLibraryOption)) for _x in libraryOptions): + raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cyruntime.cudaLibraryOption] or list[cyruntime.cudaLibraryOption]") + cylibraryOptions = libraryOptions + libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues + pylist = [_HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] + voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) + cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr + if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions)) + if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions)) + with nogil: + err = cyruntime.cudaLibraryLoadData(library._pvt_ptr, cycode, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions) + finally: + _helper_input_void_ptr_free(&cycodeHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, library) @@ -38164,25 +38620,32 @@ def cudaLibraryLoadFromFile(char* fileName, jitOptions : Optional[tuple[cudaJitO -------- :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryLoadFromFile` """ - libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues - libraryOptions = [] if libraryOptions is None else libraryOptions - if not all(isinstance(_x, (cudaLibraryOption)) for _x in libraryOptions): - raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cyruntime.cudaLibraryOption] or list[cyruntime.cudaLibraryOption]") - jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues + cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues + cdef void** cylibraryOptionValues_ptr + cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions + cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues + cdef void** cyjitOptionsValues_ptr + cdef vector[cyruntime.cudaJitOption] cyjitOptions + cdef cudaLibrary_t library + library = cudaLibrary_t() jitOptions = [] if jitOptions is None else jitOptions if not all(isinstance(_x, (cudaJitOption)) for _x in jitOptions): raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cyruntime.cudaJitOption] or list[cyruntime.cudaJitOption]") - cdef cudaLibrary_t library = cudaLibrary_t() - cdef vector[cyruntime.cudaJitOption] cyjitOptions = [int(pyjitOptions) for pyjitOptions in (jitOptions)] + cyjitOptions = jitOptions + jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues pylist = [_HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr + voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) + cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) - cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions = [int(pylibraryOptions) for pylibraryOptions in (libraryOptions)] + libraryOptions = [] if libraryOptions is None else libraryOptions + if not all(isinstance(_x, (cudaLibraryOption)) for _x in libraryOptions): + raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cyruntime.cudaLibraryOption] or list[cyruntime.cudaLibraryOption]") + cylibraryOptions = libraryOptions + libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues pylist = [_HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] - cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) - cdef void** cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr + voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) + cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions)) if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions)) with nogil: @@ -38256,6 +38719,8 @@ def cudaLibraryGetKernel(library, char* name): :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryGetKernel` """ cdef cyruntime.cudaLibrary_t cylibrary + cdef cudaKernel_t pKernel + pKernel = cudaKernel_t() if library is None: plibrary = 0 elif isinstance(library, (cudaLibrary_t,)): @@ -38263,7 +38728,6 @@ def cudaLibraryGetKernel(library, char* name): else: plibrary = int(cudaLibrary_t(library)) cylibrary = plibrary - cdef cudaKernel_t pKernel = cudaKernel_t() with nogil: err = cyruntime.cudaLibraryGetKernel(pKernel._pvt_ptr, cylibrary, name) if err != cyruntime.cudaSuccess: @@ -38307,6 +38771,8 @@ def cudaLibraryGetGlobal(library, char* name): :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cudaLibraryGetManaged`, :py:obj:`~.cuLibraryGetGlobal` """ cdef cyruntime.cudaLibrary_t cylibrary + cdef size_t numbytes = 0 + cdef void_ptr dptr = 0 if library is None: plibrary = 0 elif isinstance(library, (cudaLibrary_t,)): @@ -38314,8 +38780,6 @@ def cudaLibraryGetGlobal(library, char* name): else: plibrary = int(cudaLibrary_t(library)) cylibrary = plibrary - cdef void_ptr dptr = 0 - cdef size_t numbytes = 0 with nogil: err = cyruntime.cudaLibraryGetGlobal(&dptr, &numbytes, cylibrary, name) if err != cyruntime.cudaSuccess: @@ -38361,6 +38825,8 @@ def cudaLibraryGetManaged(library, char* name): :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cudaLibraryGetGlobal`, :py:obj:`~.cuLibraryGetManaged` """ cdef cyruntime.cudaLibrary_t cylibrary + cdef size_t numbytes = 0 + cdef void_ptr dptr = 0 if library is None: plibrary = 0 elif isinstance(library, (cudaLibrary_t,)): @@ -38368,8 +38834,6 @@ def cudaLibraryGetManaged(library, char* name): else: plibrary = int(cudaLibrary_t(library)) cylibrary = plibrary - cdef void_ptr dptr = 0 - cdef size_t numbytes = 0 with nogil: err = cyruntime.cudaLibraryGetManaged(&dptr, &numbytes, cylibrary, name) if err != cyruntime.cudaSuccess: @@ -38408,6 +38872,7 @@ def cudaLibraryGetUnifiedFunction(library, char* symbol): :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryGetUnifiedFunction` """ cdef cyruntime.cudaLibrary_t cylibrary + cdef void_ptr fptr = 0 if library is None: plibrary = 0 elif isinstance(library, (cudaLibrary_t,)): @@ -38415,7 +38880,6 @@ def cudaLibraryGetUnifiedFunction(library, char* symbol): else: plibrary = int(cudaLibrary_t(library)) cylibrary = plibrary - cdef void_ptr fptr = 0 with nogil: err = cyruntime.cudaLibraryGetUnifiedFunction(&fptr, cylibrary, symbol) if err != cyruntime.cudaSuccess: @@ -38448,6 +38912,7 @@ def cudaLibraryGetKernelCount(lib): :py:obj:`~.cudaLibraryEnumerateKernels`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cuLibraryGetKernelCount` """ cdef cyruntime.cudaLibrary_t cylib + cdef unsigned int count = 0 if lib is None: plib = 0 elif isinstance(lib, (cudaLibrary_t,)): @@ -38455,7 +38920,6 @@ def cudaLibraryGetKernelCount(lib): else: plib = int(cudaLibrary_t(lib)) cylib = plib - cdef unsigned int count = 0 with nogil: err = cyruntime.cudaLibraryGetKernelCount(&count, cylib) if err != cyruntime.cudaSuccess: @@ -38492,25 +38956,27 @@ def cudaLibraryEnumerateKernels(unsigned int numKernels, lib): :py:obj:`~.cudaLibraryGetKernelCount`, :py:obj:`~.cuLibraryEnumerateKernels` """ cdef cyruntime.cudaLibrary_t cylib - if lib is None: - plib = 0 - elif isinstance(lib, (cudaLibrary_t,)): - plib = int(lib) - else: - plib = int(cudaLibrary_t(lib)) - cylib = plib cdef cyruntime.cudaKernel_t* cykernels = NULL pykernels = [] - if numKernels != 0: - cykernels = calloc(numKernels, sizeof(cyruntime.cudaKernel_t)) - if cykernels is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(numKernels) + 'x' + str(sizeof(cyruntime.cudaKernel_t))) - with nogil: - err = cyruntime.cudaLibraryEnumerateKernels(cykernels, numKernels, cylib) - if cudaError_t(err) == cudaError_t(0): - pykernels = [cudaKernel_t(init_value=cykernels[idx]) for idx in range(numKernels)] - if cykernels is not NULL: - free(cykernels) + try: + if numKernels != 0: + cykernels = calloc(numKernels, sizeof(cyruntime.cudaKernel_t)) + if cykernels is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(numKernels) + 'x' + str(sizeof(cyruntime.cudaKernel_t))) + if lib is None: + plib = 0 + elif isinstance(lib, (cudaLibrary_t,)): + plib = int(lib) + else: + plib = int(cudaLibrary_t(lib)) + cylib = plib + with nogil: + err = cyruntime.cudaLibraryEnumerateKernels(cykernels, numKernels, cylib) + finally: + if cudaError_t(err) == cudaError_t(0): + pykernels = [cudaKernel_t(init_value=cykernels[idx]) for idx in range(numKernels)] + if cykernels is not NULL: + free(cykernels) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, pykernels) @@ -38608,6 +39074,7 @@ def cudaKernelSetAttributeForDevice(kernel, attr not None : cudaFuncAttribute, i ----- The API has stricter locking requirements in comparison to its legacy counterpart :py:obj:`~.cudaFuncSetAttribute()` due to device-wide semantics. If multiple threads are trying to set the same attribute on the same device simultaneously, the attribute setting will depend on the interleavings chosen by the OS scheduler and memory consistency. """ + cdef cyruntime.cudaFuncAttribute cyattr cdef cyruntime.cudaKernel_t cykernel if kernel is None: pkernel = 0 @@ -38616,7 +39083,7 @@ def cudaKernelSetAttributeForDevice(kernel, attr not None : cudaFuncAttribute, i else: pkernel = int(cudaKernel_t(kernel)) cykernel = pkernel - cdef cyruntime.cudaFuncAttribute cyattr = int(attr) + cyattr = int(attr) with nogil: err = cyruntime.cudaKernelSetAttributeForDevice(cykernel, cyattr, value, device) return (_cudaError_t(err),) @@ -38652,8 +39119,10 @@ def cudaDeviceGetDevResource(int device, typename not None : cudaDevResourceType -------- :py:obj:`~.cuDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc` """ - cdef cudaDevResource resource = cudaDevResource() - cdef cyruntime.cudaDevResourceType cytypename = int(typename) + cdef cyruntime.cudaDevResourceType cytypename + cdef cudaDevResource resource + resource = cudaDevResource() + cytypename = int(typename) with nogil: err = cyruntime.cudaDeviceGetDevResource(device, resource._pvt_ptr, cytypename) if err != cyruntime.cudaSuccess: @@ -38760,22 +39229,27 @@ def cudaDevSmResourceSplitByCount(unsigned int nbGroups, input_ : Optional[cudaD -------- :py:obj:`~.cuDevSmResourceSplitByCount`, :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevResourceGenerateDesc` """ + cdef cudaDevResource remaining + cdef cyruntime.cudaDevResource* cyinput__ptr + cdef unsigned int cynbGroups cdef cyruntime.cudaDevResource* cyresult = NULL pyresult = [cudaDevResource() for idx in range(nbGroups)] - if nbGroups != 0: - cyresult = calloc(nbGroups, sizeof(cyruntime.cudaDevResource)) - if cyresult is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cyruntime.cudaDevResource))) - cdef unsigned int cynbGroups = nbGroups - cdef cyruntime.cudaDevResource* cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL - cdef cudaDevResource remaining = cudaDevResource() - with nogil: - err = cyruntime.cudaDevSmResourceSplitByCount(cyresult, &cynbGroups, cyinput__ptr, remaining._pvt_ptr, flags, minCount) - if cudaError_t(err) == cudaError_t(0): - for idx in range(nbGroups): - string.memcpy((pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cyruntime.cudaDevResource)) - if cyresult is not NULL: - free(cyresult) + try: + if nbGroups != 0: + cyresult = calloc(nbGroups, sizeof(cyruntime.cudaDevResource)) + if cyresult is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cyruntime.cudaDevResource))) + cynbGroups = nbGroups + cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL + remaining = cudaDevResource() + with nogil: + err = cyruntime.cudaDevSmResourceSplitByCount(cyresult, &cynbGroups, cyinput__ptr, remaining._pvt_ptr, flags, minCount) + finally: + if cudaError_t(err) == cudaError_t(0): + for idx in range(nbGroups): + string.memcpy((pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cyruntime.cudaDevResource)) + if cyresult is not NULL: + free(cyresult) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None, None, None) return (_cudaError_t_SUCCESS, pyresult, cynbGroups, remaining) @@ -38920,22 +39394,27 @@ def cudaDevSmResourceSplit(unsigned int nbGroups, input_ : Optional[cudaDevResou -------- :py:obj:`~.cuDevSmResourceSplit`, :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevResourceGenerateDesc` """ + cdef cyruntime.cudaDevSmResourceGroupParams* cygroupParams_ptr + cdef cudaDevResource remainder + cdef cyruntime.cudaDevResource* cyinput__ptr cdef cyruntime.cudaDevResource* cyresult = NULL pyresult = [cudaDevResource() for idx in range(nbGroups)] - if nbGroups != 0: - cyresult = calloc(nbGroups, sizeof(cyruntime.cudaDevResource)) - if cyresult is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cyruntime.cudaDevResource))) - cdef cyruntime.cudaDevResource* cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL - cdef cudaDevResource remainder = cudaDevResource() - cdef cyruntime.cudaDevSmResourceGroupParams* cygroupParams_ptr = groupParams._pvt_ptr if groupParams is not None else NULL - with nogil: - err = cyruntime.cudaDevSmResourceSplit(cyresult, nbGroups, cyinput__ptr, remainder._pvt_ptr, flags, cygroupParams_ptr) - if cudaError_t(err) == cudaError_t(0): - for idx in range(nbGroups): - string.memcpy((pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cyruntime.cudaDevResource)) - if cyresult is not NULL: - free(cyresult) + try: + if nbGroups != 0: + cyresult = calloc(nbGroups, sizeof(cyruntime.cudaDevResource)) + if cyresult is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cyruntime.cudaDevResource))) + cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL + remainder = cudaDevResource() + cygroupParams_ptr = groupParams._pvt_ptr if groupParams is not None else NULL + with nogil: + err = cyruntime.cudaDevSmResourceSplit(cyresult, nbGroups, cyinput__ptr, remainder._pvt_ptr, flags, cygroupParams_ptr) + finally: + if cudaError_t(err) == cudaError_t(0): + for idx in range(nbGroups): + string.memcpy((pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cyruntime.cudaDevResource)) + if cyresult is not NULL: + free(cyresult) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None, None) return (_cudaError_t_SUCCESS, pyresult, remainder) @@ -38986,23 +39465,26 @@ def cudaDevResourceGenerateDesc(resources : Optional[tuple[cudaDevResource] | li -------- :py:obj:`~.cuDevResourceGenerateDesc`, :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaGreenCtxCreate` """ - resources = [] if resources is None else resources - if not all(isinstance(_x, (cudaDevResource,)) for _x in resources): - raise TypeError("Argument 'resources' is not instance of type (expected tuple[cyruntime.cudaDevResource,] or list[cyruntime.cudaDevResource,]") - cdef cudaDevResourceDesc_t phDesc = cudaDevResourceDesc_t() cdef cyruntime.cudaDevResource* cyresources = NULL - if len(resources) > 1: - cyresources = calloc(len(resources), sizeof(cyruntime.cudaDevResource)) - if cyresources is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(len(resources)) + 'x' + str(sizeof(cyruntime.cudaDevResource))) - for idx in range(len(resources)): - string.memcpy(&cyresources[idx], (resources[idx])._pvt_ptr, sizeof(cyruntime.cudaDevResource)) - elif len(resources) == 1: - cyresources = (resources[0])._pvt_ptr - with nogil: - err = cyruntime.cudaDevResourceGenerateDesc(phDesc._pvt_ptr, cyresources, nbResources) - if len(resources) > 1 and cyresources is not NULL: - free(cyresources) + cdef cudaDevResourceDesc_t phDesc + try: + phDesc = cudaDevResourceDesc_t() + resources = [] if resources is None else resources + if not all(isinstance(_x, (cudaDevResource,)) for _x in resources): + raise TypeError("Argument 'resources' is not instance of type (expected tuple[cyruntime.cudaDevResource,] or list[cyruntime.cudaDevResource,]") + if len(resources) > 1: + cyresources = calloc(len(resources), sizeof(cyruntime.cudaDevResource)) + if cyresources is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(len(resources)) + 'x' + str(sizeof(cyruntime.cudaDevResource))) + for idx in range(len(resources)): + string.memcpy(&cyresources[idx], (resources[idx])._pvt_ptr, sizeof(cyruntime.cudaDevResource)) + elif len(resources) == 1: + cyresources = (resources[0])._pvt_ptr + with nogil: + err = cyruntime.cudaDevResourceGenerateDesc(phDesc._pvt_ptr, cyresources, nbResources) + finally: + if len(resources) > 1 and cyresources is not NULL: + free(cyresources) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, phDesc) @@ -39056,6 +39538,8 @@ def cudaGreenCtxCreate(desc, int device, unsigned int flags): :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cudaExecutionCtxStreamCreate` """ cdef cyruntime.cudaDevResourceDesc_t cydesc + cdef cudaExecutionContext_t phCtx + phCtx = cudaExecutionContext_t() if desc is None: pdesc = 0 elif isinstance(desc, (cudaDevResourceDesc_t,)): @@ -39063,7 +39547,6 @@ def cudaGreenCtxCreate(desc, int device, unsigned int flags): else: pdesc = int(cudaDevResourceDesc_t(desc)) cydesc = pdesc - cdef cudaExecutionContext_t phCtx = cudaExecutionContext_t() with nogil: err = cyruntime.cudaGreenCtxCreate(phCtx._pvt_ptr, cydesc, device, flags) if err != cyruntime.cudaSuccess: @@ -39159,6 +39642,8 @@ def cudaExecutionCtxGetDevResource(ctx, typename not None : cudaDevResourceType) -------- :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`, :py:obj:`~.cudaGreenCtxCreate` """ + cdef cyruntime.cudaDevResourceType cytypename + cdef cudaDevResource resource cdef cyruntime.cudaExecutionContext_t cyctx if ctx is None: pctx = 0 @@ -39167,8 +39652,8 @@ def cudaExecutionCtxGetDevResource(ctx, typename not None : cudaDevResourceType) else: pctx = int(cudaExecutionContext_t(ctx)) cyctx = pctx - cdef cudaDevResource resource = cudaDevResource() - cdef cyruntime.cudaDevResourceType cytypename = int(typename) + resource = cudaDevResource() + cytypename = int(typename) with nogil: err = cyruntime.cudaExecutionCtxGetDevResource(cyctx, resource._pvt_ptr, cytypename) if err != cyruntime.cudaSuccess: @@ -39203,6 +39688,7 @@ def cudaExecutionCtxGetDevice(ctx): :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cuCtxGetDevice` """ cdef cyruntime.cudaExecutionContext_t cyctx + cdef int device = 0 if ctx is None: pctx = 0 elif isinstance(ctx, (cudaExecutionContext_t,)): @@ -39210,7 +39696,6 @@ def cudaExecutionCtxGetDevice(ctx): else: pctx = int(cudaExecutionContext_t(ctx)) cyctx = pctx - cdef int device = 0 with nogil: err = cyruntime.cudaExecutionCtxGetDevice(&device, cyctx) if err != cyruntime.cudaSuccess: @@ -39245,6 +39730,7 @@ def cudaExecutionCtxGetId(ctx): -------- :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cudaExecutionCtxGetDevice`, :py:obj:`~.cuCtxGetId` """ + cdef unsigned long long ctxId = 0 cdef cyruntime.cudaExecutionContext_t cyctx if ctx is None: pctx = 0 @@ -39253,7 +39739,6 @@ def cudaExecutionCtxGetId(ctx): else: pctx = int(cudaExecutionContext_t(ctx)) cyctx = pctx - cdef unsigned long long ctxId = 0 with nogil: err = cyruntime.cudaExecutionCtxGetId(cyctx, &ctxId) if err != cyruntime.cudaSuccess: @@ -39321,6 +39806,8 @@ def cudaExecutionCtxStreamCreate(ctx, unsigned int flags, int priority): In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations. """ cdef cyruntime.cudaExecutionContext_t cyctx + cdef cudaStream_t phStream + phStream = cudaStream_t() if ctx is None: pctx = 0 elif isinstance(ctx, (cudaExecutionContext_t,)): @@ -39328,7 +39815,6 @@ def cudaExecutionCtxStreamCreate(ctx, unsigned int flags, int priority): else: pctx = int(cudaExecutionContext_t(ctx)) cyctx = pctx - cdef cudaStream_t phStream = cudaStream_t() with nogil: err = cyruntime.cudaExecutionCtxStreamCreate(phStream._pvt_ptr, cyctx, flags, priority) if err != cyruntime.cudaSuccess: @@ -39409,6 +39895,8 @@ def cudaStreamGetDevResource(hStream, typename not None : cudaDevResourceType): -------- :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxStreamCreate`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`, :py:obj:`~.cuStreamGetDevResource` """ + cdef cyruntime.cudaDevResourceType cytypename + cdef cudaDevResource resource cdef cyruntime.cudaStream_t cyhStream if hStream is None: phStream = 0 @@ -39417,8 +39905,8 @@ def cudaStreamGetDevResource(hStream, typename not None : cudaDevResourceType): else: phStream = int(cudaStream_t(hStream)) cyhStream = phStream - cdef cudaDevResource resource = cudaDevResource() - cdef cyruntime.cudaDevResourceType cytypename = int(typename) + resource = cudaDevResource() + cytypename = int(typename) with nogil: err = cyruntime.cudaStreamGetDevResource(cyhStream, resource._pvt_ptr, cytypename) if err != cyruntime.cudaSuccess: @@ -39465,13 +39953,6 @@ def cudaExecutionCtxRecordEvent(ctx, event): The API will return :py:obj:`~.cudaErrorStreamCaptureUnsupported` if the specified execution context `ctx` has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures. """ cdef cyruntime.cudaEvent_t cyevent - if event is None: - pevent = 0 - elif isinstance(event, (cudaEvent_t,driver.CUevent)): - pevent = int(event) - else: - pevent = int(cudaEvent_t(event)) - cyevent = pevent cdef cyruntime.cudaExecutionContext_t cyctx if ctx is None: pctx = 0 @@ -39480,6 +39961,13 @@ def cudaExecutionCtxRecordEvent(ctx, event): else: pctx = int(cudaExecutionContext_t(ctx)) cyctx = pctx + if event is None: + pevent = 0 + elif isinstance(event, (cudaEvent_t,driver.CUevent)): + pevent = int(event) + else: + pevent = int(cudaEvent_t(event)) + cyevent = pevent with nogil: err = cyruntime.cudaExecutionCtxRecordEvent(cyctx, cyevent) return (_cudaError_t(err),) @@ -39523,13 +40011,6 @@ def cudaExecutionCtxWaitEvent(ctx, event): The API will return :py:obj:`~.cudaErrorStreamCaptureUnsupported` and invalidate the capture if the specified event `event` is part of an ongoing capture sequence or if the specified execution context `ctx` has a stream in the capture mode. """ cdef cyruntime.cudaEvent_t cyevent - if event is None: - pevent = 0 - elif isinstance(event, (cudaEvent_t,driver.CUevent)): - pevent = int(event) - else: - pevent = int(cudaEvent_t(event)) - cyevent = pevent cdef cyruntime.cudaExecutionContext_t cyctx if ctx is None: pctx = 0 @@ -39538,6 +40019,13 @@ def cudaExecutionCtxWaitEvent(ctx, event): else: pctx = int(cudaExecutionContext_t(ctx)) cyctx = pctx + if event is None: + pevent = 0 + elif isinstance(event, (cudaEvent_t,driver.CUevent)): + pevent = int(event) + else: + pevent = int(cudaEvent_t(event)) + cyevent = pevent with nogil: err = cyruntime.cudaExecutionCtxWaitEvent(cyctx, cyevent) return (_cudaError_t(err),) @@ -39574,7 +40062,8 @@ def cudaDeviceGetExecutionCtx(int device): -------- :py:obj:`~.cudaExecutionCtxGetDevice`, :py:obj:`~.cudaExecutionCtxGetId` """ - cdef cudaExecutionContext_t ctx = cudaExecutionContext_t() + cdef cudaExecutionContext_t ctx + ctx = cudaExecutionContext_t() with nogil: err = cyruntime.cudaDeviceGetExecutionCtx(ctx._pvt_ptr, device) if err != cyruntime.cudaSuccess: @@ -39587,8 +40076,9 @@ def cudaDeviceGetExecutionCtx(int device): @cython.embedsignature(True) def cudaGetExportTable(pExportTableId : Optional[cudaUUID_t]): """""" + cdef cyruntime.cudaUUID_t* cypExportTableId_ptr cdef void_ptr ppExportTable = 0 - cdef cyruntime.cudaUUID_t* cypExportTableId_ptr = pExportTableId._pvt_ptr if pExportTableId is not None else NULL + cypExportTableId_ptr = pExportTableId._pvt_ptr if pExportTableId is not None else NULL with nogil: err = cyruntime.cudaGetExportTable(&ppExportTable, cypExportTableId_ptr) if err != cyruntime.cudaSuccess: @@ -39630,12 +40120,16 @@ def cudaGetKernel(entryFuncAddr): -------- cudaGetKernel (C++ API) """ - cdef cudaKernel_t kernelPtr = cudaKernel_t() cdef _HelperInputVoidPtrStruct cyentryFuncAddrHelper - cdef void* cyentryFuncAddr = _helper_input_void_ptr(entryFuncAddr, &cyentryFuncAddrHelper) - with nogil: - err = cyruntime.cudaGetKernel(kernelPtr._pvt_ptr, cyentryFuncAddr) - _helper_input_void_ptr_free(&cyentryFuncAddrHelper) + cdef void* cyentryFuncAddr + cdef cudaKernel_t kernelPtr + try: + kernelPtr = cudaKernel_t() + cyentryFuncAddr = _helper_input_void_ptr(entryFuncAddr, &cyentryFuncAddrHelper) + with nogil: + err = cyruntime.cudaGetKernel(kernelPtr._pvt_ptr, cyentryFuncAddr) + finally: + _helper_input_void_ptr_free(&cyentryFuncAddrHelper) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None) return (_cudaError_t_SUCCESS, kernelPtr) @@ -39673,12 +40167,16 @@ def make_cudaPitchedPtr(d, size_t p, size_t xsz, size_t ysz): make_cudaExtent, make_cudaPos """ cdef _HelperInputVoidPtrStruct cydHelper - cdef void* cyd = _helper_input_void_ptr(d, &cydHelper) - with nogil: - err = cyruntime.make_cudaPitchedPtr(cyd, p, xsz, ysz) - _helper_input_void_ptr_free(&cydHelper) - cdef cudaPitchedPtr wrapper = cudaPitchedPtr() - wrapper._pvt_ptr[0] = err + cdef void* cyd + cdef cudaPitchedPtr wrapper + try: + cyd = _helper_input_void_ptr(d, &cydHelper) + with nogil: + err = cyruntime.make_cudaPitchedPtr(cyd, p, xsz, ysz) + finally: + _helper_input_void_ptr_free(&cydHelper) + wrapper = cudaPitchedPtr() + wrapper._pvt_ptr[0] = err return wrapper {{endif}} @@ -39711,10 +40209,13 @@ def make_cudaPos(size_t x, size_t y, size_t z): -------- make_cudaExtent, make_cudaPitchedPtr """ - with nogil: - err = cyruntime.make_cudaPos(x, y, z) - cdef cudaPos wrapper = cudaPos() - wrapper._pvt_ptr[0] = err + cdef cudaPos wrapper + try: + with nogil: + err = cyruntime.make_cudaPos(x, y, z) + finally: + wrapper = cudaPos() + wrapper._pvt_ptr[0] = err return wrapper {{endif}} @@ -39748,10 +40249,13 @@ def make_cudaExtent(size_t w, size_t h, size_t d): -------- make_cudaPitchedPtr, make_cudaPos """ - with nogil: - err = cyruntime.make_cudaExtent(w, h, d) - cdef cudaExtent wrapper = cudaExtent() - wrapper._pvt_ptr[0] = err + cdef cudaExtent wrapper + try: + with nogil: + err = cyruntime.make_cudaExtent(w, h, d) + finally: + wrapper = cudaExtent() + wrapper._pvt_ptr[0] = err return wrapper {{endif}} @@ -39816,6 +40320,8 @@ def cudaGraphicsEGLRegisterImage(image, unsigned int flags): :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsResourceGetMappedEglFrame`, :py:obj:`~.cuGraphicsEGLRegisterImage` """ cdef cyruntime.EGLImageKHR cyimage + cdef cudaGraphicsResource_t pCudaResource + pCudaResource = cudaGraphicsResource_t() if image is None: pimage = 0 elif isinstance(image, (EGLImageKHR,)): @@ -39823,7 +40329,6 @@ def cudaGraphicsEGLRegisterImage(image, unsigned int flags): else: pimage = int(EGLImageKHR(image)) cyimage = pimage - cdef cudaGraphicsResource_t pCudaResource = cudaGraphicsResource_t() with nogil: err = cyruntime.cudaGraphicsEGLRegisterImage(pCudaResource._pvt_ptr, cyimage, flags) if err != cyruntime.cudaSuccess: @@ -39859,6 +40364,8 @@ def cudaEGLStreamConsumerConnect(eglStream): :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerConnect` """ cdef cyruntime.EGLStreamKHR cyeglStream + cdef cudaEglStreamConnection conn + conn = cudaEglStreamConnection() if eglStream is None: peglStream = 0 elif isinstance(eglStream, (EGLStreamKHR,)): @@ -39866,7 +40373,6 @@ def cudaEGLStreamConsumerConnect(eglStream): else: peglStream = int(EGLStreamKHR(eglStream)) cyeglStream = peglStream - cdef cudaEglStreamConnection conn = cudaEglStreamConnection() with nogil: err = cyruntime.cudaEGLStreamConsumerConnect(conn._pvt_ptr, cyeglStream) if err != cyruntime.cudaSuccess: @@ -39906,6 +40412,8 @@ def cudaEGLStreamConsumerConnectWithFlags(eglStream, unsigned int flags): :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerConnectWithFlags` """ cdef cyruntime.EGLStreamKHR cyeglStream + cdef cudaEglStreamConnection conn + conn = cudaEglStreamConnection() if eglStream is None: peglStream = 0 elif isinstance(eglStream, (EGLStreamKHR,)): @@ -39913,7 +40421,6 @@ def cudaEGLStreamConsumerConnectWithFlags(eglStream, unsigned int flags): else: peglStream = int(EGLStreamKHR(eglStream)) cyeglStream = peglStream - cdef cudaEglStreamConnection conn = cudaEglStreamConnection() with nogil: err = cyruntime.cudaEGLStreamConsumerConnectWithFlags(conn._pvt_ptr, cyeglStream, flags) if err != cyruntime.cudaSuccess: @@ -39990,25 +40497,7 @@ def cudaEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, unsigned int :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame` """ cdef cyruntime.cudaStream_t *cypStream - if pStream is None: - cypStream = NULL - elif isinstance(pStream, (cudaStream_t,driver.CUstream)): - ppStream = pStream.getPtr() - cypStream = ppStream - elif isinstance(pStream, (int)): - cypStream = pStream - else: - raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) cdef cyruntime.cudaGraphicsResource_t *cypCudaResource - if pCudaResource is None: - cypCudaResource = NULL - elif isinstance(pCudaResource, (cudaGraphicsResource_t,)): - ppCudaResource = pCudaResource.getPtr() - cypCudaResource = ppCudaResource - elif isinstance(pCudaResource, (int)): - cypCudaResource = pCudaResource - else: - raise TypeError("Argument 'pCudaResource' is not instance of type (expected , found " + str(type(pCudaResource))) cdef cyruntime.cudaEglStreamConnection *cyconn if conn is None: cyconn = NULL @@ -40019,6 +40508,24 @@ def cudaEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, unsigned int cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) + if pCudaResource is None: + cypCudaResource = NULL + elif isinstance(pCudaResource, (cudaGraphicsResource_t,)): + ppCudaResource = pCudaResource.getPtr() + cypCudaResource = ppCudaResource + elif isinstance(pCudaResource, (int)): + cypCudaResource = pCudaResource + else: + raise TypeError("Argument 'pCudaResource' is not instance of type (expected , found " + str(type(pCudaResource))) + if pStream is None: + cypStream = NULL + elif isinstance(pStream, (cudaStream_t,driver.CUstream)): + ppStream = pStream.getPtr() + cypStream = ppStream + elif isinstance(pStream, (int)): + cypStream = pStream + else: + raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) with nogil: err = cyruntime.cudaEGLStreamConsumerAcquireFrame(cyconn, cypCudaResource, cypStream, timeout) return (_cudaError_t(err),) @@ -40052,23 +40559,7 @@ def cudaEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream): :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame` """ cdef cyruntime.cudaStream_t *cypStream - if pStream is None: - cypStream = NULL - elif isinstance(pStream, (cudaStream_t,driver.CUstream)): - ppStream = pStream.getPtr() - cypStream = ppStream - elif isinstance(pStream, (int)): - cypStream = pStream - else: - raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) cdef cyruntime.cudaGraphicsResource_t cypCudaResource - if pCudaResource is None: - ppCudaResource = 0 - elif isinstance(pCudaResource, (cudaGraphicsResource_t,)): - ppCudaResource = int(pCudaResource) - else: - ppCudaResource = int(cudaGraphicsResource_t(pCudaResource)) - cypCudaResource = ppCudaResource cdef cyruntime.cudaEglStreamConnection *cyconn if conn is None: cyconn = NULL @@ -40079,6 +40570,22 @@ def cudaEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream): cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) + if pCudaResource is None: + ppCudaResource = 0 + elif isinstance(pCudaResource, (cudaGraphicsResource_t,)): + ppCudaResource = int(pCudaResource) + else: + ppCudaResource = int(cudaGraphicsResource_t(pCudaResource)) + cypCudaResource = ppCudaResource + if pStream is None: + cypStream = NULL + elif isinstance(pStream, (cudaStream_t,driver.CUstream)): + ppStream = pStream.getPtr() + cypStream = ppStream + elif isinstance(pStream, (int)): + cypStream = pStream + else: + raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) with nogil: err = cyruntime.cudaEGLStreamConsumerReleaseFrame(cyconn, cypCudaResource, cypStream) return (_cudaError_t(err),) @@ -40116,22 +40623,10 @@ def cudaEGLStreamProducerConnect(eglStream, width, height): :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerConnect` """ cdef cyruntime.EGLint cyheight - if height is None: - pheight = 0 - elif isinstance(height, (EGLint,)): - pheight = int(height) - else: - pheight = int(EGLint(height)) - cyheight = pheight cdef cyruntime.EGLint cywidth - if width is None: - pwidth = 0 - elif isinstance(width, (EGLint,)): - pwidth = int(width) - else: - pwidth = int(EGLint(width)) - cywidth = pwidth cdef cyruntime.EGLStreamKHR cyeglStream + cdef cudaEglStreamConnection conn + conn = cudaEglStreamConnection() if eglStream is None: peglStream = 0 elif isinstance(eglStream, (EGLStreamKHR,)): @@ -40139,7 +40634,20 @@ def cudaEGLStreamProducerConnect(eglStream, width, height): else: peglStream = int(EGLStreamKHR(eglStream)) cyeglStream = peglStream - cdef cudaEglStreamConnection conn = cudaEglStreamConnection() + if width is None: + pwidth = 0 + elif isinstance(width, (EGLint,)): + pwidth = int(width) + else: + pwidth = int(EGLint(width)) + cywidth = pwidth + if height is None: + pheight = 0 + elif isinstance(height, (EGLint,)): + pheight = int(height) + else: + pheight = int(EGLint(height)) + cyheight = pheight with nogil: err = cyruntime.cudaEGLStreamProducerConnect(conn._pvt_ptr, cyeglStream, cywidth, cyheight) if err != cyruntime.cudaSuccess: @@ -40220,15 +40728,6 @@ def cudaEGLStreamProducerPresentFrame(conn, eglframe not None : cudaEglFrame, pS :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerPresentFrame` """ cdef cyruntime.cudaStream_t *cypStream - if pStream is None: - cypStream = NULL - elif isinstance(pStream, (cudaStream_t,driver.CUstream)): - ppStream = pStream.getPtr() - cypStream = ppStream - elif isinstance(pStream, (int)): - cypStream = pStream - else: - raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) cdef cyruntime.cudaEglStreamConnection *cyconn if conn is None: cyconn = NULL @@ -40239,6 +40738,15 @@ def cudaEGLStreamProducerPresentFrame(conn, eglframe not None : cudaEglFrame, pS cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) + if pStream is None: + cypStream = NULL + elif isinstance(pStream, (cudaStream_t,driver.CUstream)): + ppStream = pStream.getPtr() + cypStream = ppStream + elif isinstance(pStream, (int)): + cypStream = pStream + else: + raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) with nogil: err = cyruntime.cudaEGLStreamProducerPresentFrame(cyconn, eglframe._pvt_ptr[0], cypStream) return (_cudaError_t(err),) @@ -40274,15 +40782,7 @@ def cudaEGLStreamProducerReturnFrame(conn, eglframe : Optional[cudaEglFrame], pS :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cuEGLStreamProducerReturnFrame` """ cdef cyruntime.cudaStream_t *cypStream - if pStream is None: - cypStream = NULL - elif isinstance(pStream, (cudaStream_t,driver.CUstream)): - ppStream = pStream.getPtr() - cypStream = ppStream - elif isinstance(pStream, (int)): - cypStream = pStream - else: - raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) + cdef cyruntime.cudaEglFrame* cyeglframe_ptr cdef cyruntime.cudaEglStreamConnection *cyconn if conn is None: cyconn = NULL @@ -40293,7 +40793,16 @@ def cudaEGLStreamProducerReturnFrame(conn, eglframe : Optional[cudaEglFrame], pS cyconn = conn else: raise TypeError("Argument 'conn' is not instance of type (expected , found " + str(type(conn))) - cdef cyruntime.cudaEglFrame* cyeglframe_ptr = eglframe._pvt_ptr if eglframe is not None else NULL + cyeglframe_ptr = eglframe._pvt_ptr if eglframe is not None else NULL + if pStream is None: + cypStream = NULL + elif isinstance(pStream, (cudaStream_t,driver.CUstream)): + ppStream = pStream.getPtr() + cypStream = ppStream + elif isinstance(pStream, (int)): + cypStream = pStream + else: + raise TypeError("Argument 'pStream' is not instance of type (expected , found " + str(type(pStream))) with nogil: err = cyruntime.cudaEGLStreamProducerReturnFrame(cyconn, cyeglframe_ptr, cypStream) return (_cudaError_t(err),) @@ -40338,6 +40847,8 @@ def cudaGraphicsResourceGetMappedEglFrame(resource, unsigned int index, unsigned Note that in case of multiplanar `*eglFrame`, pitch of only first plane (unsigned int :py:obj:`~.cudaEglPlaneDesc.pitch`) is to be considered by the application. """ cdef cyruntime.cudaGraphicsResource_t cyresource + cdef cudaEglFrame eglFrame + eglFrame = cudaEglFrame() if resource is None: presource = 0 elif isinstance(resource, (cudaGraphicsResource_t,)): @@ -40345,7 +40856,6 @@ def cudaGraphicsResourceGetMappedEglFrame(resource, unsigned int index, unsigned else: presource = int(cudaGraphicsResource_t(resource)) cyresource = presource - cdef cudaEglFrame eglFrame = cudaEglFrame() with nogil: err = cyruntime.cudaGraphicsResourceGetMappedEglFrame(eglFrame._pvt_ptr, cyresource, index, mipLevel) if err != cyruntime.cudaSuccess: @@ -40394,6 +40904,8 @@ def cudaEventCreateFromEGLSync(eglSync, unsigned int flags): :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy` """ cdef cyruntime.EGLSyncKHR cyeglSync + cdef cudaEvent_t phEvent + phEvent = cudaEvent_t() if eglSync is None: peglSync = 0 elif isinstance(eglSync, (EGLSyncKHR,)): @@ -40401,7 +40913,6 @@ def cudaEventCreateFromEGLSync(eglSync, unsigned int flags): else: peglSync = int(EGLSyncKHR(eglSync)) cyeglSync = peglSync - cdef cudaEvent_t phEvent = cudaEvent_t() with nogil: err = cyruntime.cudaEventCreateFromEGLSync(phEvent._pvt_ptr, cyeglSync, flags) if err != cyruntime.cudaSuccess: @@ -40514,20 +41025,23 @@ def cudaGLGetDevices(unsigned int cudaDeviceCount, deviceList not None : cudaGLD This function is not supported on Mac OS X. """ - cdef unsigned int pCudaDeviceCount = 0 + cdef cyruntime.cudaGLDeviceList cydeviceList cdef int* cypCudaDevices = NULL pypCudaDevices = [] - if cudaDeviceCount != 0: - cypCudaDevices = calloc(cudaDeviceCount, sizeof(int)) - if cypCudaDevices is NULL: - raise MemoryError('Failed to allocate length x size memory: ' + str(cudaDeviceCount) + 'x' + str(sizeof(int))) - cdef cyruntime.cudaGLDeviceList cydeviceList = int(deviceList) - with nogil: - err = cyruntime.cudaGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList) - if cudaError_t(err) == cudaError_t(0): - pypCudaDevices = [cypCudaDevices[idx] for idx in range(cudaDeviceCount)] - if cypCudaDevices is not NULL: - free(cypCudaDevices) + cdef unsigned int pCudaDeviceCount = 0 + try: + if cudaDeviceCount != 0: + cypCudaDevices = calloc(cudaDeviceCount, sizeof(int)) + if cypCudaDevices is NULL: + raise MemoryError('Failed to allocate length x size memory: ' + str(cudaDeviceCount) + 'x' + str(sizeof(int))) + cydeviceList = int(deviceList) + with nogil: + err = cyruntime.cudaGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList) + finally: + if cudaError_t(err) == cudaError_t(0): + pypCudaDevices = [cypCudaDevices[idx] for idx in range(cudaDeviceCount)] + if cypCudaDevices is not NULL: + free(cypCudaDevices) if err != cyruntime.cudaSuccess: return (_cudaError_t(err), None, None) return (_cudaError_t_SUCCESS, pCudaDeviceCount, pypCudaDevices) @@ -40610,14 +41124,9 @@ def cudaGraphicsGLRegisterImage(image, target, unsigned int flags): :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsGLRegisterImage` """ cdef cyruntime.GLenum cytarget - if target is None: - ptarget = 0 - elif isinstance(target, (GLenum,)): - ptarget = int(target) - else: - ptarget = int(GLenum(target)) - cytarget = ptarget cdef cyruntime.GLuint cyimage + cdef cudaGraphicsResource_t resource + resource = cudaGraphicsResource_t() if image is None: pimage = 0 elif isinstance(image, (GLuint,)): @@ -40625,7 +41134,13 @@ def cudaGraphicsGLRegisterImage(image, target, unsigned int flags): else: pimage = int(GLuint(image)) cyimage = pimage - cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t() + if target is None: + ptarget = 0 + elif isinstance(target, (GLenum,)): + ptarget = int(target) + else: + ptarget = int(GLenum(target)) + cytarget = ptarget with nogil: err = cyruntime.cudaGraphicsGLRegisterImage(resource._pvt_ptr, cyimage, cytarget, flags) if err != cyruntime.cudaSuccess: @@ -40675,6 +41190,8 @@ def cudaGraphicsGLRegisterBuffer(buffer, unsigned int flags): :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsGLRegisterBuffer` """ cdef cyruntime.GLuint cybuffer + cdef cudaGraphicsResource_t resource + resource = cudaGraphicsResource_t() if buffer is None: pbuffer = 0 elif isinstance(buffer, (GLuint,)): @@ -40682,7 +41199,6 @@ def cudaGraphicsGLRegisterBuffer(buffer, unsigned int flags): else: pbuffer = int(GLuint(buffer)) cybuffer = pbuffer - cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t() with nogil: err = cyruntime.cudaGraphicsGLRegisterBuffer(resource._pvt_ptr, cybuffer, flags) if err != cyruntime.cudaSuccess: @@ -40718,6 +41234,15 @@ def cudaVDPAUGetDevice(vdpDevice, vdpGetProcAddress): :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cuVDPAUGetDevice` """ cdef cyruntime.VdpGetProcAddress *cyvdpGetProcAddress + cdef cyruntime.VdpDevice cyvdpDevice + cdef int device = 0 + if vdpDevice is None: + pvdpDevice = 0 + elif isinstance(vdpDevice, (VdpDevice,)): + pvdpDevice = int(vdpDevice) + else: + pvdpDevice = int(VdpDevice(vdpDevice)) + cyvdpDevice = pvdpDevice if vdpGetProcAddress is None: cyvdpGetProcAddress = NULL elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)): @@ -40727,15 +41252,6 @@ def cudaVDPAUGetDevice(vdpDevice, vdpGetProcAddress): cyvdpGetProcAddress = vdpGetProcAddress else: raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected , found " + str(type(vdpGetProcAddress))) - cdef cyruntime.VdpDevice cyvdpDevice - if vdpDevice is None: - pvdpDevice = 0 - elif isinstance(vdpDevice, (VdpDevice,)): - pvdpDevice = int(vdpDevice) - else: - pvdpDevice = int(VdpDevice(vdpDevice)) - cyvdpDevice = pvdpDevice - cdef int device = 0 with nogil: err = cyruntime.cudaVDPAUGetDevice(&device, cyvdpDevice, cyvdpGetProcAddress) if err != cyruntime.cudaSuccess: @@ -40780,6 +41296,14 @@ def cudaVDPAUSetVDPAUDevice(int device, vdpDevice, vdpGetProcAddress): :py:obj:`~.cudaGraphicsVDPAURegisterVideoSurface`, :py:obj:`~.cudaGraphicsVDPAURegisterOutputSurface`, :py:obj:`~.cudaDeviceReset` """ cdef cyruntime.VdpGetProcAddress *cyvdpGetProcAddress + cdef cyruntime.VdpDevice cyvdpDevice + if vdpDevice is None: + pvdpDevice = 0 + elif isinstance(vdpDevice, (VdpDevice,)): + pvdpDevice = int(vdpDevice) + else: + pvdpDevice = int(VdpDevice(vdpDevice)) + cyvdpDevice = pvdpDevice if vdpGetProcAddress is None: cyvdpGetProcAddress = NULL elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)): @@ -40789,14 +41313,6 @@ def cudaVDPAUSetVDPAUDevice(int device, vdpDevice, vdpGetProcAddress): cyvdpGetProcAddress = vdpGetProcAddress else: raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected , found " + str(type(vdpGetProcAddress))) - cdef cyruntime.VdpDevice cyvdpDevice - if vdpDevice is None: - pvdpDevice = 0 - elif isinstance(vdpDevice, (VdpDevice,)): - pvdpDevice = int(vdpDevice) - else: - pvdpDevice = int(VdpDevice(vdpDevice)) - cyvdpDevice = pvdpDevice with nogil: err = cyruntime.cudaVDPAUSetVDPAUDevice(device, cyvdpDevice, cyvdpGetProcAddress) return (_cudaError_t(err),) @@ -40844,6 +41360,8 @@ def cudaGraphicsVDPAURegisterVideoSurface(vdpSurface, unsigned int flags): :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsVDPAURegisterVideoSurface` """ cdef cyruntime.VdpVideoSurface cyvdpSurface + cdef cudaGraphicsResource_t resource + resource = cudaGraphicsResource_t() if vdpSurface is None: pvdpSurface = 0 elif isinstance(vdpSurface, (VdpVideoSurface,)): @@ -40851,7 +41369,6 @@ def cudaGraphicsVDPAURegisterVideoSurface(vdpSurface, unsigned int flags): else: pvdpSurface = int(VdpVideoSurface(vdpSurface)) cyvdpSurface = pvdpSurface - cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t() with nogil: err = cyruntime.cudaGraphicsVDPAURegisterVideoSurface(resource._pvt_ptr, cyvdpSurface, flags) if err != cyruntime.cudaSuccess: @@ -40901,6 +41418,8 @@ def cudaGraphicsVDPAURegisterOutputSurface(vdpSurface, unsigned int flags): :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsVDPAURegisterOutputSurface` """ cdef cyruntime.VdpOutputSurface cyvdpSurface + cdef cudaGraphicsResource_t resource + resource = cudaGraphicsResource_t() if vdpSurface is None: pvdpSurface = 0 elif isinstance(vdpSurface, (VdpOutputSurface,)): @@ -40908,7 +41427,6 @@ def cudaGraphicsVDPAURegisterOutputSurface(vdpSurface, unsigned int flags): else: pvdpSurface = int(VdpOutputSurface(vdpSurface)) cyvdpSurface = pvdpSurface - cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t() with nogil: err = cyruntime.cudaGraphicsVDPAURegisterOutputSurface(resource._pvt_ptr, cyvdpSurface, flags) if err != cyruntime.cudaSuccess: From d663f52ff580669c4eccf2f0c69ae36c53ac2b30 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 20 Feb 2026 14:56:34 -0500 Subject: [PATCH 2/2] Handle None early --- cuda_bindings/cuda/bindings/driver.pyx.in | 144 +++++++++++---------- cuda_bindings/cuda/bindings/nvrtc.pyx.in | 6 +- cuda_bindings/cuda/bindings/runtime.pyx.in | 107 ++++++++------- 3 files changed, 138 insertions(+), 119 deletions(-) diff --git a/cuda_bindings/cuda/bindings/driver.pyx.in b/cuda_bindings/cuda/bindings/driver.pyx.in index 4384b24684..61e8330036 100644 --- a/cuda_bindings/cuda/bindings/driver.pyx.in +++ b/cuda_bindings/cuda/bindings/driver.pyx.in @@ -26102,7 +26102,9 @@ def cuDeviceGetHostAtomicCapabilities(operations : Optional[tuple[CUatomicOperat :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetP2PAtomicCapabilities`, :py:obj:`~.cudaDeviceGeHostAtomicCapabilities` """ cdef cydriver.CUdevice cydev + cdef vector[cydriver.CUatomicOperation] cyoperations + operations = [] if operations is None else operations cdef unsigned int* cycapabilities = NULL pycapabilities = [] try: @@ -26110,7 +26112,6 @@ def cuDeviceGetHostAtomicCapabilities(operations : Optional[tuple[CUatomicOperat cycapabilities = calloc(count, sizeof(unsigned int)) if cycapabilities is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) - operations = [] if operations is None else operations if not all(isinstance(_x, (CUatomicOperation)) for _x in operations): raise TypeError("Argument 'operations' is not instance of type (expected tuple[cydriver.CUatomicOperation] or list[cydriver.CUatomicOperation]") cyoperations = operations @@ -28405,7 +28406,10 @@ def cuModuleLoadDataEx(image, unsigned int numOptions, options : Optional[tuple[ """ cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues cdef void ** cyoptionValues_ptr + optionValues = [] if optionValues is None else optionValues + cdef vector[cydriver.CUjit_option] cyoptions + options = [] if options is None else options cdef _HelperInputVoidPtrStruct cyimageHelper cdef void* cyimage cdef CUmodule module @@ -28414,11 +28418,9 @@ def cuModuleLoadDataEx(image, unsigned int numOptions, options : Optional[tuple[ cyimage = _helper_input_void_ptr(image, &cyimageHelper) if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) - options = [] if options is None else options if not all(isinstance(_x, (CUjit_option)) for _x in options): raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") cyoptions = options - optionValues = [] if optionValues is None else optionValues pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) cyoptionValues_ptr = voidStarHelperoptionValues.cptr @@ -28794,15 +28796,16 @@ def cuLinkCreate(unsigned int numOptions, options : Optional[tuple[CUjit_option] cdef CUlinkState stateOut cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues cdef void ** cyoptionValues_ptr + optionValues = [] if optionValues is None else optionValues + cdef vector[cydriver.CUjit_option] cyoptions + options = [] if options is None else options try: if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) - options = [] if options is None else options if not all(isinstance(_x, (CUjit_option)) for _x in options): raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") cyoptions = options - optionValues = [] if optionValues is None else optionValues pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) cyoptionValues_ptr = voidStarHelperoptionValues.cptr @@ -28868,7 +28871,10 @@ def cuLinkAddData(state, typename not None : CUjitInputType, data, size_t size, """ cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues cdef void ** cyoptionValues_ptr + optionValues = [] if optionValues is None else optionValues + cdef vector[cydriver.CUjit_option] cyoptions + options = [] if options is None else options cdef _HelperInputVoidPtrStruct cydataHelper cdef void* cydata cdef cydriver.CUjitInputType cytypename @@ -28885,11 +28891,9 @@ def cuLinkAddData(state, typename not None : CUjitInputType, data, size_t size, cydata = _helper_input_void_ptr(data, &cydataHelper) if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) - options = [] if options is None else options if not all(isinstance(_x, (CUjit_option)) for _x in options): raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") cyoptions = options - optionValues = [] if optionValues is None else optionValues pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) cyoptionValues_ptr = voidStarHelperoptionValues.cptr @@ -28948,7 +28952,10 @@ def cuLinkAddFile(state, typename not None : CUjitInputType, char* path, unsigne """ cdef _InputVoidPtrPtrHelper voidStarHelperoptionValues cdef void ** cyoptionValues_ptr + optionValues = [] if optionValues is None else optionValues + cdef vector[cydriver.CUjit_option] cyoptions + options = [] if options is None else options cdef cydriver.CUjitInputType cytypename cdef cydriver.CUlinkState cystate if state is None: @@ -28961,11 +28968,9 @@ def cuLinkAddFile(state, typename not None : CUjitInputType, char* path, unsigne cytypename = int(typename) if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) if numOptions > len(optionValues): raise RuntimeError("List is too small: " + str(len(optionValues)) + " < " + str(numOptions)) - options = [] if options is None else options if not all(isinstance(_x, (CUjit_option)) for _x in options): raise TypeError("Argument 'options' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") cyoptions = options - optionValues = [] if optionValues is None else optionValues pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(options, optionValues)] voidStarHelperoptionValues = _InputVoidPtrPtrHelper(pylist) cyoptionValues_ptr = voidStarHelperoptionValues.cptr @@ -29223,31 +29228,33 @@ def cuLibraryLoadData(code, jitOptions : Optional[tuple[CUjit_option] | list[CUj """ cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues cdef void** cylibraryOptionValues_ptr + libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues + cdef vector[cydriver.CUlibraryOption] cylibraryOptions + libraryOptions = [] if libraryOptions is None else libraryOptions cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues cdef void** cyjitOptionsValues_ptr + jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues + cdef vector[cydriver.CUjit_option] cyjitOptions + jitOptions = [] if jitOptions is None else jitOptions cdef _HelperInputVoidPtrStruct cycodeHelper cdef void* cycode cdef CUlibrary library try: library = CUlibrary() cycode = _helper_input_void_ptr(code, &cycodeHelper) - jitOptions = [] if jitOptions is None else jitOptions if not all(isinstance(_x, (CUjit_option)) for _x in jitOptions): raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") cyjitOptions = jitOptions - jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) - libraryOptions = [] if libraryOptions is None else libraryOptions if not all(isinstance(_x, (CUlibraryOption)) for _x in libraryOptions): raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cydriver.CUlibraryOption] or list[cydriver.CUlibraryOption]") cylibraryOptions = libraryOptions - libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues pylist = [_HelperCUlibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr @@ -29333,27 +29340,29 @@ def cuLibraryLoadFromFile(char* fileName, jitOptions : Optional[tuple[CUjit_opti """ cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues cdef void** cylibraryOptionValues_ptr + libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues + cdef vector[cydriver.CUlibraryOption] cylibraryOptions + libraryOptions = [] if libraryOptions is None else libraryOptions cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues cdef void** cyjitOptionsValues_ptr + jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues + cdef vector[cydriver.CUjit_option] cyjitOptions + jitOptions = [] if jitOptions is None else jitOptions cdef CUlibrary library library = CUlibrary() - jitOptions = [] if jitOptions is None else jitOptions if not all(isinstance(_x, (CUjit_option)) for _x in jitOptions): raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cydriver.CUjit_option] or list[cydriver.CUjit_option]") cyjitOptions = jitOptions - jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues pylist = [_HelperCUjit_option(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) - libraryOptions = [] if libraryOptions is None else libraryOptions if not all(isinstance(_x, (CUlibraryOption)) for _x in libraryOptions): raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cydriver.CUlibraryOption] or list[cydriver.CUlibraryOption]") cylibraryOptions = libraryOptions - libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues pylist = [_HelperCUlibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr @@ -33414,11 +33423,13 @@ def cuMemcpyBatchAsync(dsts : Optional[tuple[CUdeviceptr] | list[CUdeviceptr]], cdef cydriver.CUstream cyhStream cdef vector[size_t] cyattrsIdxs cdef cydriver.CUmemcpyAttributes* cyattrs = NULL + attrs = [] if attrs is None else attrs cdef vector[size_t] cysizes cdef cydriver.CUdeviceptr* cysrcs = NULL + srcs = [] if srcs is None else srcs cdef cydriver.CUdeviceptr* cydsts = NULL + dsts = [] if dsts is None else dsts try: - dsts = [] if dsts is None else dsts if not all(isinstance(_x, (CUdeviceptr,)) for _x in dsts): raise TypeError("Argument 'dsts' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") if len(dsts) > 1: @@ -33430,7 +33441,6 @@ def cuMemcpyBatchAsync(dsts : Optional[tuple[CUdeviceptr] | list[CUdeviceptr]], cydsts[idx] = (dsts[idx])._pvt_ptr[0] elif len(dsts) == 1: cydsts = (dsts[0])._pvt_ptr - srcs = [] if srcs is None else srcs if not all(isinstance(_x, (CUdeviceptr,)) for _x in srcs): raise TypeError("Argument 'srcs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") if len(srcs) > 1: @@ -33448,7 +33458,6 @@ def cuMemcpyBatchAsync(dsts : Optional[tuple[CUdeviceptr] | list[CUdeviceptr]], if count > len(dsts): raise RuntimeError("List is too small: " + str(len(dsts)) + " < " + str(count)) if count > len(srcs): raise RuntimeError("List is too small: " + str(len(srcs)) + " < " + str(count)) if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) - attrs = [] if attrs is None else attrs if not all(isinstance(_x, (CUmemcpyAttributes,)) for _x in attrs): raise TypeError("Argument 'attrs' is not instance of type (expected tuple[cydriver.CUmemcpyAttributes,] or list[cydriver.CUmemcpyAttributes,]") if len(attrs) > 1: @@ -33588,9 +33597,9 @@ def cuMemcpy3DBatchAsync(size_t numOps, opList : Optional[tuple[CUDA_MEMCPY3D_BA """ cdef cydriver.CUstream cyhStream cdef cydriver.CUDA_MEMCPY3D_BATCH_OP* cyopList = NULL + opList = [] if opList is None else opList try: if numOps > len(opList): raise RuntimeError("List is too small: " + str(len(opList)) + " < " + str(numOps)) - opList = [] if opList is None else opList if not all(isinstance(_x, (CUDA_MEMCPY3D_BATCH_OP,)) for _x in opList): raise TypeError("Argument 'opList' is not instance of type (expected tuple[cydriver.CUDA_MEMCPY3D_BATCH_OP,] or list[cydriver.CUDA_MEMCPY3D_BATCH_OP,]") if len(opList) > 1: @@ -35668,8 +35677,8 @@ def cuMemMapArrayAsync(mapInfoList : Optional[tuple[CUarrayMapInfo] | list[CUarr """ cdef cydriver.CUstream cyhStream cdef cydriver.CUarrayMapInfo* cymapInfoList = NULL + mapInfoList = [] if mapInfoList is None else mapInfoList try: - mapInfoList = [] if mapInfoList is None else mapInfoList if not all(isinstance(_x, (CUarrayMapInfo,)) for _x in mapInfoList): raise TypeError("Argument 'mapInfoList' is not instance of type (expected tuple[cydriver.CUarrayMapInfo,] or list[cydriver.CUarrayMapInfo,]") if len(mapInfoList) > 1: @@ -35790,6 +35799,7 @@ def cuMemSetAccess(ptr, size_t size, desc : Optional[tuple[CUmemAccessDesc] | li :py:obj:`~.cuMemSetAccess`, :py:obj:`~.cuMemCreate`, :py:obj:`~.py`:obj:`~.cuMemMap` """ cdef cydriver.CUmemAccessDesc* cydesc = NULL + desc = [] if desc is None else desc cdef cydriver.CUdeviceptr cyptr try: if ptr is None: @@ -35799,7 +35809,6 @@ def cuMemSetAccess(ptr, size_t size, desc : Optional[tuple[CUmemAccessDesc] | li else: pptr = int(CUdeviceptr(ptr)) cyptr = pptr - desc = [] if desc is None else desc if not all(isinstance(_x, (CUmemAccessDesc,)) for _x in desc): raise TypeError("Argument 'desc' is not instance of type (expected tuple[cydriver.CUmemAccessDesc,] or list[cydriver.CUmemAccessDesc,]") if len(desc) > 1: @@ -36451,6 +36460,7 @@ def cuMemPoolSetAccess(pool, map : Optional[tuple[CUmemAccessDesc] | list[CUmemA :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemPoolCreate` """ cdef cydriver.CUmemAccessDesc* cymap = NULL + map = [] if map is None else map cdef cydriver.CUmemoryPool cypool try: if pool is None: @@ -36460,7 +36470,6 @@ def cuMemPoolSetAccess(pool, map : Optional[tuple[CUmemAccessDesc] | list[CUmemA else: ppool = int(CUmemoryPool(pool)) cypool = ppool - map = [] if map is None else map if not all(isinstance(_x, (CUmemAccessDesc,)) for _x in map): raise TypeError("Argument 'map' is not instance of type (expected tuple[cydriver.CUmemAccessDesc,] or list[cydriver.CUmemAccessDesc,]") if len(map) > 1: @@ -38316,10 +38325,11 @@ def cuMemPrefetchBatchAsync(dptrs : Optional[tuple[CUdeviceptr] | list[CUdevicep cdef cydriver.CUstream cyhStream cdef vector[size_t] cyprefetchLocIdxs cdef cydriver.CUmemLocation* cyprefetchLocs = NULL + prefetchLocs = [] if prefetchLocs is None else prefetchLocs cdef vector[size_t] cysizes cdef cydriver.CUdeviceptr* cydptrs = NULL + dptrs = [] if dptrs is None else dptrs try: - dptrs = [] if dptrs is None else dptrs if not all(isinstance(_x, (CUdeviceptr,)) for _x in dptrs): raise TypeError("Argument 'dptrs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") if len(dptrs) > 1: @@ -38336,7 +38346,6 @@ def cuMemPrefetchBatchAsync(dptrs : Optional[tuple[CUdeviceptr] | list[CUdevicep cysizes = sizes if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) - prefetchLocs = [] if prefetchLocs is None else prefetchLocs if not all(isinstance(_x, (CUmemLocation,)) for _x in prefetchLocs): raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cydriver.CUmemLocation,] or list[cydriver.CUmemLocation,]") if len(prefetchLocs) > 1: @@ -38424,8 +38433,8 @@ def cuMemDiscardBatchAsync(dptrs : Optional[tuple[CUdeviceptr] | list[CUdevicept cdef cydriver.CUstream cyhStream cdef vector[size_t] cysizes cdef cydriver.CUdeviceptr* cydptrs = NULL + dptrs = [] if dptrs is None else dptrs try: - dptrs = [] if dptrs is None else dptrs if not all(isinstance(_x, (CUdeviceptr,)) for _x in dptrs): raise TypeError("Argument 'dptrs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") if len(dptrs) > 1: @@ -38536,10 +38545,11 @@ def cuMemDiscardAndPrefetchBatchAsync(dptrs : Optional[tuple[CUdeviceptr] | list cdef cydriver.CUstream cyhStream cdef vector[size_t] cyprefetchLocIdxs cdef cydriver.CUmemLocation* cyprefetchLocs = NULL + prefetchLocs = [] if prefetchLocs is None else prefetchLocs cdef vector[size_t] cysizes cdef cydriver.CUdeviceptr* cydptrs = NULL + dptrs = [] if dptrs is None else dptrs try: - dptrs = [] if dptrs is None else dptrs if not all(isinstance(_x, (CUdeviceptr,)) for _x in dptrs): raise TypeError("Argument 'dptrs' is not instance of type (expected tuple[cydriver.CUdeviceptr,] or list[cydriver.CUdeviceptr,]") if len(dptrs) > 1: @@ -38556,7 +38566,6 @@ def cuMemDiscardAndPrefetchBatchAsync(dptrs : Optional[tuple[CUdeviceptr] | list cysizes = sizes if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) - prefetchLocs = [] if prefetchLocs is None else prefetchLocs if not all(isinstance(_x, (CUmemLocation,)) for _x in prefetchLocs): raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cydriver.CUmemLocation,] or list[cydriver.CUmemLocation,]") if len(prefetchLocs) > 1: @@ -38811,7 +38820,9 @@ def cuMemRangeGetAttributes(dataSizes : tuple[int] | list[int], attributes : Opt :py:obj:`~.cuMemRangeGetAttribute`, :py:obj:`~.cuMemAdvise`, :py:obj:`~.cuMemPrefetchAsync`, :py:obj:`~.cudaMemRangeGetAttributes` """ cdef cydriver.CUdeviceptr cydevPtr + cdef vector[cydriver.CUmem_range_attribute] cyattributes + attributes = [] if attributes is None else attributes cdef vector[size_t] cydataSizes cdef _InputVoidPtrPtrHelper voidStarHelperdata cdef void** cyvoidStarHelper_ptr @@ -38821,7 +38832,6 @@ def cuMemRangeGetAttributes(dataSizes : tuple[int] | list[int], attributes : Opt if not all(isinstance(_x, (int)) for _x in dataSizes): raise TypeError("Argument 'dataSizes' is not instance of type (expected tuple[int] or list[int]") cydataSizes = dataSizes - attributes = [] if attributes is None else attributes if not all(isinstance(_x, (CUmem_range_attribute)) for _x in attributes): raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cydriver.CUmem_range_attribute] or list[cydriver.CUmem_range_attribute]") cyattributes = attributes @@ -38974,9 +38984,10 @@ def cuPointerGetAttributes(unsigned int numAttributes, attributes : Optional[tup cdef cydriver.CUdeviceptr cyptr cdef _InputVoidPtrPtrHelper voidStarHelperdata cdef void** cyvoidStarHelper_ptr + cdef vector[cydriver.CUpointer_attribute] cyattributes - if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes)) attributes = [] if attributes is None else attributes + if numAttributes > len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes)) if not all(isinstance(_x, (CUpointer_attribute)) for _x in attributes): raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cydriver.CUpointer_attribute] or list[cydriver.CUpointer_attribute]") cyattributes = attributes @@ -39725,7 +39736,9 @@ def cuStreamBeginCaptureToGraph(hStream, hGraph, dependencies : Optional[tuple[C """ cdef cydriver.CUstreamCaptureMode cymode cdef cydriver.CUgraphEdgeData* cydependencyData = NULL + dependencyData = [] if dependencyData is None else dependencyData cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef cydriver.CUstream cyhStream try: @@ -39743,7 +39756,6 @@ def cuStreamBeginCaptureToGraph(hStream, hGraph, dependencies : Optional[tuple[C else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -39755,7 +39767,6 @@ def cuStreamBeginCaptureToGraph(hStream, hGraph, dependencies : Optional[tuple[C cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] elif len(dependencies) == 1: cydependencies = (dependencies[0])._pvt_ptr - dependencyData = [] if dependencyData is None else dependencyData if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData): raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") if len(dependencyData) > 1: @@ -40115,7 +40126,9 @@ def cuStreamUpdateCaptureDependencies(hStream, dependencies : Optional[tuple[CUg :py:obj:`~.cuStreamBeginCapture`, :py:obj:`~.cuStreamGetCaptureInfo` """ cdef cydriver.CUgraphEdgeData* cydependencyData = NULL + dependencyData = [] if dependencyData is None else dependencyData cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUstream cyhStream try: if hStream is None: @@ -40125,7 +40138,6 @@ def cuStreamUpdateCaptureDependencies(hStream, dependencies : Optional[tuple[CUg else: phStream = int(CUstream(hStream)) cyhStream = phStream - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -40137,7 +40149,6 @@ def cuStreamUpdateCaptureDependencies(hStream, dependencies : Optional[tuple[CUg cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] elif len(dependencies) == 1: cydependencies = (dependencies[0])._pvt_ptr - dependencyData = [] if dependencyData is None else dependencyData if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData): raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") if len(dependencyData) > 1: @@ -41535,9 +41546,10 @@ def cuSignalExternalSemaphoresAsync(extSemArray : Optional[tuple[CUexternalSemap """ cdef cydriver.CUstream cystream cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* cyparamsArray = NULL + paramsArray = [] if paramsArray is None else paramsArray cdef cydriver.CUexternalSemaphore* cyextSemArray = NULL + extSemArray = [] if extSemArray is None else extSemArray try: - extSemArray = [] if extSemArray is None else extSemArray if not all(isinstance(_x, (CUexternalSemaphore,)) for _x in extSemArray): raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cydriver.CUexternalSemaphore,] or list[cydriver.CUexternalSemaphore,]") if len(extSemArray) > 1: @@ -41549,7 +41561,6 @@ def cuSignalExternalSemaphoresAsync(extSemArray : Optional[tuple[CUexternalSemap cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] elif len(extSemArray) == 1: cyextSemArray = (extSemArray[0])._pvt_ptr - paramsArray = [] if paramsArray is None else paramsArray if not all(isinstance(_x, (CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,)) for _x in paramsArray): raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,] or list[cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,]") if len(paramsArray) > 1: @@ -41667,9 +41678,10 @@ def cuWaitExternalSemaphoresAsync(extSemArray : Optional[tuple[CUexternalSemapho """ cdef cydriver.CUstream cystream cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* cyparamsArray = NULL + paramsArray = [] if paramsArray is None else paramsArray cdef cydriver.CUexternalSemaphore* cyextSemArray = NULL + extSemArray = [] if extSemArray is None else extSemArray try: - extSemArray = [] if extSemArray is None else extSemArray if not all(isinstance(_x, (CUexternalSemaphore,)) for _x in extSemArray): raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cydriver.CUexternalSemaphore,] or list[cydriver.CUexternalSemaphore,]") if len(extSemArray) > 1: @@ -41681,7 +41693,6 @@ def cuWaitExternalSemaphoresAsync(extSemArray : Optional[tuple[CUexternalSemapho cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] elif len(extSemArray) == 1: cyextSemArray = (extSemArray[0])._pvt_ptr - paramsArray = [] if paramsArray is None else paramsArray if not all(isinstance(_x, (CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,)) for _x in paramsArray): raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,] or list[cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,]") if len(paramsArray) > 1: @@ -42066,6 +42077,7 @@ def cuStreamBatchMemOp(stream, unsigned int count, paramArray : Optional[tuple[C Warning: Improper use of this API may deadlock the application. Synchronization ordering established through this API is not visible to CUDA. CUDA tasks that are (even indirectly) ordered by this API should also have that order expressed with CUDA-visible dependencies such as events. This ensures that the scheduler does not serialize them in an improper order. """ cdef cydriver.CUstreamBatchMemOpParams* cyparamArray = NULL + paramArray = [] if paramArray is None else paramArray cdef cydriver.CUstream cystream try: if stream is None: @@ -42076,7 +42088,6 @@ def cuStreamBatchMemOp(stream, unsigned int count, paramArray : Optional[tuple[C pstream = int(CUstream(stream)) cystream = pstream if count > len(paramArray): raise RuntimeError("List is too small: " + str(len(paramArray)) + " < " + str(count)) - paramArray = [] if paramArray is None else paramArray if not all(isinstance(_x, (CUstreamBatchMemOpParams,)) for _x in paramArray): raise TypeError("Argument 'paramArray' is not instance of type (expected tuple[cydriver.CUstreamBatchMemOpParams,] or list[cydriver.CUstreamBatchMemOpParams,]") if len(paramArray) > 1: @@ -43258,8 +43269,8 @@ def cuLaunchCooperativeKernelMultiDevice(launchParamsList : Optional[tuple[CUDA_ :py:obj:`~.cuCtxGetCacheConfig`, :py:obj:`~.cuCtxSetCacheConfig`, :py:obj:`~.cuFuncSetCacheConfig`, :py:obj:`~.cuFuncGetAttribute`, :py:obj:`~.cuLaunchCooperativeKernel`, :py:obj:`~.cudaLaunchCooperativeKernelMultiDevice` """ cdef cydriver.CUDA_LAUNCH_PARAMS* cylaunchParamsList = NULL + launchParamsList = [] if launchParamsList is None else launchParamsList try: - launchParamsList = [] if launchParamsList is None else launchParamsList if not all(isinstance(_x, (CUDA_LAUNCH_PARAMS,)) for _x in launchParamsList): raise TypeError("Argument 'launchParamsList' is not instance of type (expected tuple[cydriver.CUDA_LAUNCH_PARAMS,] or list[cydriver.CUDA_LAUNCH_PARAMS,]") if len(launchParamsList) > 1: @@ -44079,6 +44090,7 @@ def cuGraphAddKernelNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | li """ cdef cydriver.CUDA_KERNEL_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -44090,7 +44102,6 @@ def cuGraphAddKernelNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | li else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -44256,6 +44267,7 @@ def cuGraphAddMemcpyNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | li cdef cydriver.CUcontext cyctx cdef cydriver.CUDA_MEMCPY3D* cycopyParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -44267,7 +44279,6 @@ def cuGraphAddMemcpyNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | li else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -44421,6 +44432,7 @@ def cuGraphAddMemsetNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | li cdef cydriver.CUcontext cyctx cdef cydriver.CUDA_MEMSET_NODE_PARAMS* cymemsetParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -44432,7 +44444,6 @@ def cuGraphAddMemsetNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | li else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -44583,6 +44594,7 @@ def cuGraphAddHostNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | list """ cdef cydriver.CUDA_HOST_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -44594,7 +44606,6 @@ def cuGraphAddHostNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | list else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -44741,6 +44752,7 @@ def cuGraphAddChildGraphNode(hGraph, dependencies : Optional[tuple[CUgraphNode] """ cdef cydriver.CUgraph cychildGraph cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -44752,7 +44764,6 @@ def cuGraphAddChildGraphNode(hGraph, dependencies : Optional[tuple[CUgraphNode] else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -44868,6 +44879,7 @@ def cuGraphAddEmptyNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | lis :py:obj:`~.cuGraphAddNode`, :py:obj:`~.cuGraphCreate`, :py:obj:`~.cuGraphDestroyNode`, :py:obj:`~.cuGraphAddChildGraphNode`, :py:obj:`~.cuGraphAddKernelNode`, :py:obj:`~.cuGraphAddHostNode`, :py:obj:`~.cuGraphAddMemcpyNode`, :py:obj:`~.cuGraphAddMemsetNode` """ cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -44879,7 +44891,6 @@ def cuGraphAddEmptyNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | lis else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -44942,6 +44953,7 @@ def cuGraphAddEventRecordNode(hGraph, dependencies : Optional[tuple[CUgraphNode] """ cdef cydriver.CUevent cyevent cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -44953,7 +44965,6 @@ def cuGraphAddEventRecordNode(hGraph, dependencies : Optional[tuple[CUgraphNode] else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -45111,6 +45122,7 @@ def cuGraphAddEventWaitNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | """ cdef cydriver.CUevent cyevent cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -45122,7 +45134,6 @@ def cuGraphAddEventWaitNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -45279,6 +45290,7 @@ def cuGraphAddExternalSemaphoresSignalNode(hGraph, dependencies : Optional[tuple """ cdef cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -45290,7 +45302,6 @@ def cuGraphAddExternalSemaphoresSignalNode(hGraph, dependencies : Optional[tuple else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -45442,6 +45453,7 @@ def cuGraphAddExternalSemaphoresWaitNode(hGraph, dependencies : Optional[tuple[C """ cdef cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -45453,7 +45465,6 @@ def cuGraphAddExternalSemaphoresWaitNode(hGraph, dependencies : Optional[tuple[C else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -45608,6 +45619,7 @@ def cuGraphAddBatchMemOpNode(hGraph, dependencies : Optional[tuple[CUgraphNode] """ cdef cydriver.CUDA_BATCH_MEM_OP_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -45619,7 +45631,6 @@ def cuGraphAddBatchMemOpNode(hGraph, dependencies : Optional[tuple[CUgraphNode] else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -45884,6 +45895,7 @@ def cuGraphAddMemAllocNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | """ cdef cydriver.CUDA_MEM_ALLOC_NODE_PARAMS* cynodeParams_ptr cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -45895,7 +45907,6 @@ def cuGraphAddMemAllocNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -46021,6 +46032,7 @@ def cuGraphAddMemFreeNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | l """ cdef cydriver.CUdeviceptr cydptr cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -46032,7 +46044,6 @@ def cuGraphAddMemFreeNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | l else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -47017,8 +47028,11 @@ def cuGraphAddDependencies(hGraph, from_ : Optional[tuple[CUgraphNode] | list[CU :py:obj:`~.cuGraphRemoveDependencies`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes` """ cdef cydriver.CUgraphEdgeData* cyedgeData = NULL + edgeData = [] if edgeData is None else edgeData cdef cydriver.CUgraphNode* cyto = NULL + to = [] if to is None else to cdef cydriver.CUgraphNode* cyfrom_ = NULL + from_ = [] if from_ is None else from_ cdef cydriver.CUgraph cyhGraph try: if hGraph is None: @@ -47028,7 +47042,6 @@ def cuGraphAddDependencies(hGraph, from_ : Optional[tuple[CUgraphNode] | list[CU else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - from_ = [] if from_ is None else from_ if not all(isinstance(_x, (CUgraphNode,)) for _x in from_): raise TypeError("Argument 'from_' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(from_) > 1: @@ -47040,7 +47053,6 @@ def cuGraphAddDependencies(hGraph, from_ : Optional[tuple[CUgraphNode] | list[CU cyfrom_[idx] = (from_[idx])._pvt_ptr[0] elif len(from_) == 1: cyfrom_ = (from_[0])._pvt_ptr - to = [] if to is None else to if not all(isinstance(_x, (CUgraphNode,)) for _x in to): raise TypeError("Argument 'to' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(to) > 1: @@ -47052,7 +47064,6 @@ def cuGraphAddDependencies(hGraph, from_ : Optional[tuple[CUgraphNode] | list[CU cyto[idx] = (to[idx])._pvt_ptr[0] elif len(to) == 1: cyto = (to[0])._pvt_ptr - edgeData = [] if edgeData is None else edgeData if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in edgeData): raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") if len(edgeData) > 1: @@ -47118,8 +47129,11 @@ def cuGraphRemoveDependencies(hGraph, from_ : Optional[tuple[CUgraphNode] | list :py:obj:`~.cuGraphAddDependencies`, :py:obj:`~.cuGraphGetEdges`, :py:obj:`~.cuGraphNodeGetDependencies`, :py:obj:`~.cuGraphNodeGetDependentNodes` """ cdef cydriver.CUgraphEdgeData* cyedgeData = NULL + edgeData = [] if edgeData is None else edgeData cdef cydriver.CUgraphNode* cyto = NULL + to = [] if to is None else to cdef cydriver.CUgraphNode* cyfrom_ = NULL + from_ = [] if from_ is None else from_ cdef cydriver.CUgraph cyhGraph try: if hGraph is None: @@ -47129,7 +47143,6 @@ def cuGraphRemoveDependencies(hGraph, from_ : Optional[tuple[CUgraphNode] | list else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - from_ = [] if from_ is None else from_ if not all(isinstance(_x, (CUgraphNode,)) for _x in from_): raise TypeError("Argument 'from_' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(from_) > 1: @@ -47141,7 +47154,6 @@ def cuGraphRemoveDependencies(hGraph, from_ : Optional[tuple[CUgraphNode] | list cyfrom_[idx] = (from_[idx])._pvt_ptr[0] elif len(from_) == 1: cyfrom_ = (from_[0])._pvt_ptr - to = [] if to is None else to if not all(isinstance(_x, (CUgraphNode,)) for _x in to): raise TypeError("Argument 'to' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(to) > 1: @@ -47153,7 +47165,6 @@ def cuGraphRemoveDependencies(hGraph, from_ : Optional[tuple[CUgraphNode] | list cyto[idx] = (to[idx])._pvt_ptr[0] elif len(to) == 1: cyto = (to[0])._pvt_ptr - edgeData = [] if edgeData is None else edgeData if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in edgeData): raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") if len(edgeData) > 1: @@ -49102,7 +49113,9 @@ def cuGraphAddNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | list[CUg """ cdef cydriver.CUgraphNodeParams* cynodeParams_ptr cdef cydriver.CUgraphEdgeData* cydependencyData = NULL + dependencyData = [] if dependencyData is None else dependencyData cdef cydriver.CUgraphNode* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cydriver.CUgraph cyhGraph cdef CUgraphNode phGraphNode try: @@ -49114,7 +49127,6 @@ def cuGraphAddNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | list[CUg else: phGraph = int(CUgraph(hGraph)) cyhGraph = phGraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (CUgraphNode,)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cydriver.CUgraphNode,] or list[cydriver.CUgraphNode,]") if len(dependencies) > 1: @@ -49126,7 +49138,6 @@ def cuGraphAddNode(hGraph, dependencies : Optional[tuple[CUgraphNode] | list[CUg cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] elif len(dependencies) == 1: cydependencies = (dependencies[0])._pvt_ptr - dependencyData = [] if dependencyData is None else dependencyData if not all(isinstance(_x, (CUgraphEdgeData,)) for _x in dependencyData): raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cydriver.CUgraphEdgeData,] or list[cydriver.CUgraphEdgeData,]") if len(dependencyData) > 1: @@ -52301,7 +52312,9 @@ def cuTensorMapEncodeIm2col(tensorDataType not None : CUtensorMapDataType, tenso cdef cydriver.cuuint32_t cypixelsPerColumn cdef cydriver.cuuint32_t cychannelsPerPixel cdef vector[int] cypixelBoxUpperCorner + pixelBoxUpperCorner = [] if pixelBoxUpperCorner is None else pixelBoxUpperCorner cdef vector[int] cypixelBoxLowerCorner + pixelBoxLowerCorner = [] if pixelBoxLowerCorner is None else pixelBoxLowerCorner cdef cydriver.cuuint64_t* cyglobalStrides cdef size_t globalStridesLen cdef cydriver.cuuint64_t[5] globalStridesStatic @@ -52346,11 +52359,9 @@ def cuTensorMapEncodeIm2col(tensorDataType not None : CUtensorMapDataType, tenso cyglobalStrides = globalStridesStatic else: raise ValueError("Argument 'globalStrides' too long, must be <= 5") - pixelBoxLowerCorner = [] if pixelBoxLowerCorner is None else pixelBoxLowerCorner if not all(isinstance(_x, (int)) for _x in pixelBoxLowerCorner): raise TypeError("Argument 'pixelBoxLowerCorner' is not instance of type (expected tuple[int] or list[int]") cypixelBoxLowerCorner = pixelBoxLowerCorner - pixelBoxUpperCorner = [] if pixelBoxUpperCorner is None else pixelBoxUpperCorner if not all(isinstance(_x, (int)) for _x in pixelBoxUpperCorner): raise TypeError("Argument 'pixelBoxUpperCorner' is not instance of type (expected tuple[int] or list[int]") cypixelBoxUpperCorner = pixelBoxUpperCorner @@ -53063,7 +53074,9 @@ def cuDeviceGetP2PAtomicCapabilities(operations : Optional[tuple[CUatomicOperati """ cdef cydriver.CUdevice cydstDevice cdef cydriver.CUdevice cysrcDevice + cdef vector[cydriver.CUatomicOperation] cyoperations + operations = [] if operations is None else operations cdef unsigned int* cycapabilities = NULL pycapabilities = [] try: @@ -53071,7 +53084,6 @@ def cuDeviceGetP2PAtomicCapabilities(operations : Optional[tuple[CUatomicOperati cycapabilities = calloc(count, sizeof(unsigned int)) if cycapabilities is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) - operations = [] if operations is None else operations if not all(isinstance(_x, (CUatomicOperation)) for _x in operations): raise TypeError("Argument 'operations' is not instance of type (expected tuple[cydriver.CUatomicOperation] or list[cydriver.CUatomicOperation]") cyoperations = operations @@ -54771,10 +54783,10 @@ def cuDevResourceGenerateDesc(resources : Optional[tuple[CUdevResource] | list[C :py:obj:`~.cuDevSmResourceSplitByCount` """ cdef cydriver.CUdevResource* cyresources = NULL + resources = [] if resources is None else resources cdef CUdevResourceDesc phDesc try: phDesc = CUdevResourceDesc() - resources = [] if resources is None else resources if not all(isinstance(_x, (CUdevResource,)) for _x in resources): raise TypeError("Argument 'resources' is not instance of type (expected tuple[cydriver.CUdevResource,] or list[cydriver.CUdevResource,]") if len(resources) > 1: diff --git a/cuda_bindings/cuda/bindings/nvrtc.pyx.in b/cuda_bindings/cuda/bindings/nvrtc.pyx.in index 7af23ccfdd..4ede62e586 100644 --- a/cuda_bindings/cuda/bindings/nvrtc.pyx.in +++ b/cuda_bindings/cuda/bindings/nvrtc.pyx.in @@ -274,16 +274,16 @@ def nvrtcCreateProgram(char* src, char* name, int numHeaders, headers : Optional :py:obj:`~.nvrtcDestroyProgram` """ cdef vector[const char*] cyincludeNames + includeNames = [] if includeNames is None else includeNames cdef vector[const char*] cyheaders + headers = [] if headers is None else headers cdef nvrtcProgram prog prog = nvrtcProgram() if numHeaders > len(headers): raise RuntimeError("List is too small: " + str(len(headers)) + " < " + str(numHeaders)) if numHeaders > len(includeNames): raise RuntimeError("List is too small: " + str(len(includeNames)) + " < " + str(numHeaders)) - headers = [] if headers is None else headers if not all(isinstance(_x, (bytes)) for _x in headers): raise TypeError("Argument 'headers' is not instance of type (expected tuple[bytes] or list[bytes]") cyheaders = headers - includeNames = [] if includeNames is None else includeNames if not all(isinstance(_x, (bytes)) for _x in includeNames): raise TypeError("Argument 'includeNames' is not instance of type (expected tuple[bytes] or list[bytes]") cyincludeNames = includeNames @@ -363,6 +363,7 @@ def nvrtcCompileProgram(prog, int numOptions, options : Optional[tuple[bytes] | - :py:obj:`~.NVRTC_ERROR_CANCELLED` """ cdef vector[const char*] cyoptions + options = [] if options is None else options cdef cynvrtc.nvrtcProgram cyprog if prog is None: pprog = 0 @@ -372,7 +373,6 @@ def nvrtcCompileProgram(prog, int numOptions, options : Optional[tuple[bytes] | pprog = int(nvrtcProgram(prog)) cyprog = pprog if numOptions > len(options): raise RuntimeError("List is too small: " + str(len(options)) + " < " + str(numOptions)) - options = [] if options is None else options if not all(isinstance(_x, (bytes)) for _x in options): raise TypeError("Argument 'options' is not instance of type (expected tuple[bytes] or list[bytes]") cyoptions = options diff --git a/cuda_bindings/cuda/bindings/runtime.pyx.in b/cuda_bindings/cuda/bindings/runtime.pyx.in index 7c0caf0be9..d77e7ee16f 100644 --- a/cuda_bindings/cuda/bindings/runtime.pyx.in +++ b/cuda_bindings/cuda/bindings/runtime.pyx.in @@ -21895,7 +21895,9 @@ def cudaDeviceGetHostAtomicCapabilities(operations : Optional[tuple[cudaAtomicOp -------- :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaDeviceGetP2PAtomicCapabilities`, :py:obj:`~.cuDeviceGeHostAtomicCapabilities` """ + cdef vector[cyruntime.cudaAtomicOperation] cyoperations + operations = [] if operations is None else operations cdef unsigned int* cycapabilities = NULL pycapabilities = [] try: @@ -21903,7 +21905,6 @@ def cudaDeviceGetHostAtomicCapabilities(operations : Optional[tuple[cudaAtomicOp cycapabilities = calloc(count, sizeof(unsigned int)) if cycapabilities is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) - operations = [] if operations is None else operations if not all(isinstance(_x, (cudaAtomicOperation)) for _x in operations): raise TypeError("Argument 'operations' is not instance of type (expected tuple[cyruntime.cudaAtomicOperation] or list[cyruntime.cudaAtomicOperation]") cyoperations = operations @@ -22238,7 +22239,9 @@ def cudaDeviceGetP2PAtomicCapabilities(operations : Optional[tuple[cudaAtomicOpe -------- :py:obj:`~.cudaDeviceGetP2PAttribute`, :py:obj:`~.cuDeviceGetP2PAttribute`, :py:obj:`~.cuDeviceGetP2PAtomicCapabilities` """ + cdef vector[cyruntime.cudaAtomicOperation] cyoperations + operations = [] if operations is None else operations cdef unsigned int* cycapabilities = NULL pycapabilities = [] try: @@ -22246,7 +22249,6 @@ def cudaDeviceGetP2PAtomicCapabilities(operations : Optional[tuple[cudaAtomicOpe cycapabilities = calloc(count, sizeof(unsigned int)) if cycapabilities is NULL: raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int))) - operations = [] if operations is None else operations if not all(isinstance(_x, (cudaAtomicOperation)) for _x in operations): raise TypeError("Argument 'operations' is not instance of type (expected tuple[cyruntime.cudaAtomicOperation] or list[cyruntime.cudaAtomicOperation]") cyoperations = operations @@ -23569,7 +23571,9 @@ def cudaStreamBeginCaptureToGraph(stream, graph, dependencies : Optional[tuple[c """ cdef cyruntime.cudaStreamCaptureMode cymode cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL + dependencyData = [] if dependencyData is None else dependencyData cdef cyruntime.cudaGraphNode_t* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cyruntime.cudaGraph_t cygraph cdef cyruntime.cudaStream_t cystream try: @@ -23587,7 +23591,6 @@ def cudaStreamBeginCaptureToGraph(stream, graph, dependencies : Optional[tuple[c else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(dependencies) > 1: @@ -23599,7 +23602,6 @@ def cudaStreamBeginCaptureToGraph(stream, graph, dependencies : Optional[tuple[c cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] elif len(dependencies) == 1: cydependencies = (dependencies[0])._pvt_ptr - dependencyData = [] if dependencyData is None else dependencyData if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData): raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") if len(dependencyData) > 1: @@ -23957,7 +23959,9 @@ def cudaStreamUpdateCaptureDependencies(stream, dependencies : Optional[tuple[cu :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamGetCaptureInfo`, """ cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL + dependencyData = [] if dependencyData is None else dependencyData cdef cyruntime.cudaGraphNode_t* cydependencies = NULL + dependencies = [] if dependencies is None else dependencies cdef cyruntime.cudaStream_t cystream try: if stream is None: @@ -23967,7 +23971,6 @@ def cudaStreamUpdateCaptureDependencies(stream, dependencies : Optional[tuple[cu else: pstream = int(cudaStream_t(stream)) cystream = pstream - dependencies = [] if dependencies is None else dependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies): raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(dependencies) > 1: @@ -23979,7 +23982,6 @@ def cudaStreamUpdateCaptureDependencies(stream, dependencies : Optional[tuple[cu cydependencies[idx] = (dependencies[idx])._pvt_ptr[0] elif len(dependencies) == 1: cydependencies = (dependencies[0])._pvt_ptr - dependencyData = [] if dependencyData is None else dependencyData if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData): raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") if len(dependencyData) > 1: @@ -25014,9 +25016,10 @@ def cudaSignalExternalSemaphoresAsync(extSemArray : Optional[tuple[cudaExternalS """ cdef cyruntime.cudaStream_t cystream cdef cyruntime.cudaExternalSemaphoreSignalParams* cyparamsArray = NULL + paramsArray = [] if paramsArray is None else paramsArray cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL + extSemArray = [] if extSemArray is None else extSemArray try: - extSemArray = [] if extSemArray is None else extSemArray if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray): raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphore_t,] or list[cyruntime.cudaExternalSemaphore_t,]") if len(extSemArray) > 1: @@ -25028,7 +25031,6 @@ def cudaSignalExternalSemaphoresAsync(extSemArray : Optional[tuple[cudaExternalS cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] elif len(extSemArray) == 1: cyextSemArray = (extSemArray[0])._pvt_ptr - paramsArray = [] if paramsArray is None else paramsArray if not all(isinstance(_x, (cudaExternalSemaphoreSignalParams,)) for _x in paramsArray): raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphoreSignalParams,] or list[cyruntime.cudaExternalSemaphoreSignalParams,]") if len(paramsArray) > 1: @@ -25145,9 +25147,10 @@ def cudaWaitExternalSemaphoresAsync(extSemArray : Optional[tuple[cudaExternalSem """ cdef cyruntime.cudaStream_t cystream cdef cyruntime.cudaExternalSemaphoreWaitParams* cyparamsArray = NULL + paramsArray = [] if paramsArray is None else paramsArray cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL + extSemArray = [] if extSemArray is None else extSemArray try: - extSemArray = [] if extSemArray is None else extSemArray if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray): raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphore_t,] or list[cyruntime.cudaExternalSemaphore_t,]") if len(extSemArray) > 1: @@ -25159,7 +25162,6 @@ def cudaWaitExternalSemaphoresAsync(extSemArray : Optional[tuple[cudaExternalSem cyextSemArray[idx] = (extSemArray[idx])._pvt_ptr[0] elif len(extSemArray) == 1: cyextSemArray = (extSemArray[0])._pvt_ptr - paramsArray = [] if paramsArray is None else paramsArray if not all(isinstance(_x, (cudaExternalSemaphoreWaitParams,)) for _x in paramsArray): raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphoreWaitParams,] or list[cyruntime.cudaExternalSemaphoreWaitParams,]") if len(paramsArray) > 1: @@ -28272,17 +28274,18 @@ def cudaMemcpyBatchAsync(dsts : Optional[tuple[Any] | list[Any]], srcs : Optiona cdef cyruntime.cudaStream_t cystream cdef vector[size_t] cyattrsIdxs cdef cyruntime.cudaMemcpyAttributes* cyattrs = NULL + attrs = [] if attrs is None else attrs cdef vector[size_t] cysizes cdef _InputVoidPtrPtrHelper voidStarHelpersrcs cdef const void** cysrcs_ptr + srcs = [] if srcs is None else srcs cdef _InputVoidPtrPtrHelper voidStarHelperdsts cdef const void** cydsts_ptr + dsts = [] if dsts is None else dsts try: - dsts = [] if dsts is None else dsts pylist = [_HelperInputVoidPtr(pydsts) for pydsts in dsts] voidStarHelperdsts = _InputVoidPtrPtrHelper(pylist) cydsts_ptr = voidStarHelperdsts.cptr - srcs = [] if srcs is None else srcs pylist = [_HelperInputVoidPtr(pysrcs) for pysrcs in srcs] voidStarHelpersrcs = _InputVoidPtrPtrHelper(pylist) cysrcs_ptr = voidStarHelpersrcs.cptr @@ -28292,7 +28295,6 @@ def cudaMemcpyBatchAsync(dsts : Optional[tuple[Any] | list[Any]], srcs : Optiona if count > len(dsts): raise RuntimeError("List is too small: " + str(len(dsts)) + " < " + str(count)) if count > len(srcs): raise RuntimeError("List is too small: " + str(len(srcs)) + " < " + str(count)) if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) - attrs = [] if attrs is None else attrs if not all(isinstance(_x, (cudaMemcpyAttributes,)) for _x in attrs): raise TypeError("Argument 'attrs' is not instance of type (expected tuple[cyruntime.cudaMemcpyAttributes,] or list[cyruntime.cudaMemcpyAttributes,]") if len(attrs) > 1: @@ -28427,9 +28429,9 @@ def cudaMemcpy3DBatchAsync(size_t numOps, opList : Optional[tuple[cudaMemcpy3DBa """ cdef cyruntime.cudaStream_t cystream cdef cyruntime.cudaMemcpy3DBatchOp* cyopList = NULL + opList = [] if opList is None else opList try: if numOps > len(opList): raise RuntimeError("List is too small: " + str(len(opList)) + " < " + str(numOps)) - opList = [] if opList is None else opList if not all(isinstance(_x, (cudaMemcpy3DBatchOp,)) for _x in opList): raise TypeError("Argument 'opList' is not instance of type (expected tuple[cyruntime.cudaMemcpy3DBatchOp,] or list[cyruntime.cudaMemcpy3DBatchOp,]") if len(opList) > 1: @@ -29255,11 +29257,12 @@ def cudaMemPrefetchBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : cdef cyruntime.cudaStream_t cystream cdef vector[size_t] cyprefetchLocIdxs cdef cyruntime.cudaMemLocation* cyprefetchLocs = NULL + prefetchLocs = [] if prefetchLocs is None else prefetchLocs cdef vector[size_t] cysizes cdef _InputVoidPtrPtrHelper voidStarHelperdptrs cdef void** cydptrs_ptr + dptrs = [] if dptrs is None else dptrs try: - dptrs = [] if dptrs is None else dptrs pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs] voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist) cydptrs_ptr = voidStarHelperdptrs.cptr @@ -29268,7 +29271,6 @@ def cudaMemPrefetchBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : cysizes = sizes if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) - prefetchLocs = [] if prefetchLocs is None else prefetchLocs if not all(isinstance(_x, (cudaMemLocation,)) for _x in prefetchLocs): raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cyruntime.cudaMemLocation,] or list[cyruntime.cudaMemLocation,]") if len(prefetchLocs) > 1: @@ -29455,11 +29457,12 @@ def cudaMemDiscardAndPrefetchBatchAsync(dptrs : Optional[tuple[Any] | list[Any]] cdef cyruntime.cudaStream_t cystream cdef vector[size_t] cyprefetchLocIdxs cdef cyruntime.cudaMemLocation* cyprefetchLocs = NULL + prefetchLocs = [] if prefetchLocs is None else prefetchLocs cdef vector[size_t] cysizes cdef _InputVoidPtrPtrHelper voidStarHelperdptrs cdef void** cydptrs_ptr + dptrs = [] if dptrs is None else dptrs try: - dptrs = [] if dptrs is None else dptrs pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs] voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist) cydptrs_ptr = voidStarHelperdptrs.cptr @@ -29468,7 +29471,6 @@ def cudaMemDiscardAndPrefetchBatchAsync(dptrs : Optional[tuple[Any] | list[Any]] cysizes = sizes if count > len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count)) if count > len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count)) - prefetchLocs = [] if prefetchLocs is None else prefetchLocs if not all(isinstance(_x, (cudaMemLocation,)) for _x in prefetchLocs): raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cyruntime.cudaMemLocation,] or list[cyruntime.cudaMemLocation,]") if len(prefetchLocs) > 1: @@ -29920,7 +29922,9 @@ def cudaMemRangeGetAttributes(dataSizes : tuple[int] | list[int], attributes : O """ cdef _HelperInputVoidPtrStruct cydevPtrHelper cdef void* cydevPtr + cdef vector[cyruntime.cudaMemRangeAttribute] cyattributes + attributes = [] if attributes is None else attributes cdef vector[size_t] cydataSizes cdef _InputVoidPtrPtrHelper voidStarHelperdata cdef void** cyvoidStarHelper_ptr @@ -29931,7 +29935,6 @@ def cudaMemRangeGetAttributes(dataSizes : tuple[int] | list[int], attributes : O if not all(isinstance(_x, (int)) for _x in dataSizes): raise TypeError("Argument 'dataSizes' is not instance of type (expected tuple[int] or list[int]") cydataSizes = dataSizes - attributes = [] if attributes is None else attributes if not all(isinstance(_x, (cudaMemRangeAttribute)) for _x in attributes): raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cyruntime.cudaMemRangeAttribute] or list[cyruntime.cudaMemRangeAttribute]") cyattributes = attributes @@ -30655,6 +30658,7 @@ def cudaMemPoolSetAccess(memPool, descList : Optional[tuple[cudaMemAccessDesc] | :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cudaMemPoolGetAccess`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync` """ cdef cyruntime.cudaMemAccessDesc* cydescList = NULL + descList = [] if descList is None else descList cdef cyruntime.cudaMemPool_t cymemPool try: if memPool is None: @@ -30664,7 +30668,6 @@ def cudaMemPoolSetAccess(memPool, descList : Optional[tuple[cudaMemAccessDesc] | else: pmemPool = int(cudaMemPool_t(memPool)) cymemPool = pmemPool - descList = [] if descList is None else descList if not all(isinstance(_x, (cudaMemAccessDesc,)) for _x in descList): raise TypeError("Argument 'descList' is not instance of type (expected tuple[cyruntime.cudaMemAccessDesc,] or list[cyruntime.cudaMemAccessDesc,]") if len(descList) > 1: @@ -32901,6 +32904,7 @@ def cudaGraphAddKernelNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t """ cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -32912,7 +32916,6 @@ def cudaGraphAddKernelNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -33207,6 +33210,7 @@ def cudaGraphAddMemcpyNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t """ cdef cyruntime.cudaMemcpy3DParms* cypCopyParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -33218,7 +33222,6 @@ def cudaGraphAddMemcpyNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -33307,6 +33310,7 @@ def cudaGraphAddMemcpyNode1D(graph, pDependencies : Optional[tuple[cudaGraphNode cdef _HelperInputVoidPtrStruct cydstHelper cdef void* cydst cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -33318,7 +33322,6 @@ def cudaGraphAddMemcpyNode1D(graph, pDependencies : Optional[tuple[cudaGraphNode else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -33533,6 +33536,7 @@ def cudaGraphAddMemsetNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t """ cdef cyruntime.cudaMemsetParams* cypMemsetParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -33544,7 +33548,6 @@ def cudaGraphAddMemsetNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -33688,6 +33691,7 @@ def cudaGraphAddHostNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] """ cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -33699,7 +33703,6 @@ def cudaGraphAddHostNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -33846,6 +33849,7 @@ def cudaGraphAddChildGraphNode(graph, pDependencies : Optional[tuple[cudaGraphNo """ cdef cyruntime.cudaGraph_t cychildGraph cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -33857,7 +33861,6 @@ def cudaGraphAddChildGraphNode(graph, pDependencies : Optional[tuple[cudaGraphNo else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -33973,6 +33976,7 @@ def cudaGraphAddEmptyNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode` """ cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -33984,7 +33988,6 @@ def cudaGraphAddEmptyNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -34049,6 +34052,7 @@ def cudaGraphAddEventRecordNode(graph, pDependencies : Optional[tuple[cudaGraphN """ cdef cyruntime.cudaEvent_t cyevent cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -34060,7 +34064,6 @@ def cudaGraphAddEventRecordNode(graph, pDependencies : Optional[tuple[cudaGraphN else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -34221,6 +34224,7 @@ def cudaGraphAddEventWaitNode(graph, pDependencies : Optional[tuple[cudaGraphNod """ cdef cyruntime.cudaEvent_t cyevent cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -34232,7 +34236,6 @@ def cudaGraphAddEventWaitNode(graph, pDependencies : Optional[tuple[cudaGraphNod else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -34389,6 +34392,7 @@ def cudaGraphAddExternalSemaphoresSignalNode(graph, pDependencies : Optional[tup """ cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -34400,7 +34404,6 @@ def cudaGraphAddExternalSemaphoresSignalNode(graph, pDependencies : Optional[tup else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -34552,6 +34555,7 @@ def cudaGraphAddExternalSemaphoresWaitNode(graph, pDependencies : Optional[tuple """ cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -34563,7 +34567,6 @@ def cudaGraphAddExternalSemaphoresWaitNode(graph, pDependencies : Optional[tuple else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -34754,6 +34757,7 @@ def cudaGraphAddMemAllocNode(graph, pDependencies : Optional[tuple[cudaGraphNode """ cdef cyruntime.cudaMemAllocNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -34765,7 +34769,6 @@ def cudaGraphAddMemAllocNode(graph, pDependencies : Optional[tuple[cudaGraphNode else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -34892,6 +34895,7 @@ def cudaGraphAddMemFreeNode(graph, pDependencies : Optional[tuple[cudaGraphNode_ cdef _HelperInputVoidPtrStruct cydptrHelper cdef void* cydptr cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -34903,7 +34907,6 @@ def cudaGraphAddMemFreeNode(graph, pDependencies : Optional[tuple[cudaGraphNode_ else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -35859,8 +35862,11 @@ def cudaGraphAddDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | li :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes` """ cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL + edgeData = [] if edgeData is None else edgeData cdef cyruntime.cudaGraphNode_t* cyto = NULL + to = [] if to is None else to cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL + from_ = [] if from_ is None else from_ cdef cyruntime.cudaGraph_t cygraph try: if graph is None: @@ -35870,7 +35876,6 @@ def cudaGraphAddDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | li else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - from_ = [] if from_ is None else from_ if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_): raise TypeError("Argument 'from_' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(from_) > 1: @@ -35882,7 +35887,6 @@ def cudaGraphAddDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | li cyfrom_[idx] = (from_[idx])._pvt_ptr[0] elif len(from_) == 1: cyfrom_ = (from_[0])._pvt_ptr - to = [] if to is None else to if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to): raise TypeError("Argument 'to' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(to) > 1: @@ -35894,7 +35898,6 @@ def cudaGraphAddDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | li cyto[idx] = (to[idx])._pvt_ptr[0] elif len(to) == 1: cyto = (to[0])._pvt_ptr - edgeData = [] if edgeData is None else edgeData if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData): raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") if len(edgeData) > 1: @@ -35957,8 +35960,11 @@ def cudaGraphRemoveDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes` """ cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL + edgeData = [] if edgeData is None else edgeData cdef cyruntime.cudaGraphNode_t* cyto = NULL + to = [] if to is None else to cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL + from_ = [] if from_ is None else from_ cdef cyruntime.cudaGraph_t cygraph try: if graph is None: @@ -35968,7 +35974,6 @@ def cudaGraphRemoveDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - from_ = [] if from_ is None else from_ if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_): raise TypeError("Argument 'from_' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(from_) > 1: @@ -35980,7 +35985,6 @@ def cudaGraphRemoveDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | cyfrom_[idx] = (from_[idx])._pvt_ptr[0] elif len(from_) == 1: cyfrom_ = (from_[0])._pvt_ptr - to = [] if to is None else to if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to): raise TypeError("Argument 'to' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(to) > 1: @@ -35992,7 +35996,6 @@ def cudaGraphRemoveDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | cyto[idx] = (to[idx])._pvt_ptr[0] elif len(to) == 1: cyto = (to[0])._pvt_ptr - edgeData = [] if edgeData is None else edgeData if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData): raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") if len(edgeData) > 1: @@ -37957,7 +37960,9 @@ def cudaGraphAddNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | li """ cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL + dependencyData = [] if dependencyData is None else dependencyData cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL + pDependencies = [] if pDependencies is None else pDependencies cdef cyruntime.cudaGraph_t cygraph cdef cudaGraphNode_t pGraphNode try: @@ -37969,7 +37974,6 @@ def cudaGraphAddNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | li else: pgraph = int(cudaGraph_t(graph)) cygraph = pgraph - pDependencies = [] if pDependencies is None else pDependencies if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies): raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]") if len(pDependencies) > 1: @@ -37981,7 +37985,6 @@ def cudaGraphAddNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | li cypDependencies[idx] = (pDependencies[idx])._pvt_ptr[0] elif len(pDependencies) == 1: cypDependencies = (pDependencies[0])._pvt_ptr - dependencyData = [] if dependencyData is None else dependencyData if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData): raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]") if len(dependencyData) > 1: @@ -38511,31 +38514,33 @@ def cudaLibraryLoadData(code, jitOptions : Optional[tuple[cudaJitOption] | list[ """ cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues cdef void** cylibraryOptionValues_ptr + libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues + cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions + libraryOptions = [] if libraryOptions is None else libraryOptions cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues cdef void** cyjitOptionsValues_ptr + jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues + cdef vector[cyruntime.cudaJitOption] cyjitOptions + jitOptions = [] if jitOptions is None else jitOptions cdef _HelperInputVoidPtrStruct cycodeHelper cdef void* cycode cdef cudaLibrary_t library try: library = cudaLibrary_t() cycode = _helper_input_void_ptr(code, &cycodeHelper) - jitOptions = [] if jitOptions is None else jitOptions if not all(isinstance(_x, (cudaJitOption)) for _x in jitOptions): raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cyruntime.cudaJitOption] or list[cyruntime.cudaJitOption]") cyjitOptions = jitOptions - jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues pylist = [_HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) - libraryOptions = [] if libraryOptions is None else libraryOptions if not all(isinstance(_x, (cudaLibraryOption)) for _x in libraryOptions): raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cyruntime.cudaLibraryOption] or list[cyruntime.cudaLibraryOption]") cylibraryOptions = libraryOptions - libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues pylist = [_HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr @@ -38622,27 +38627,29 @@ def cudaLibraryLoadFromFile(char* fileName, jitOptions : Optional[tuple[cudaJitO """ cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues cdef void** cylibraryOptionValues_ptr + libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues + cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions + libraryOptions = [] if libraryOptions is None else libraryOptions cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues cdef void** cyjitOptionsValues_ptr + jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues + cdef vector[cyruntime.cudaJitOption] cyjitOptions + jitOptions = [] if jitOptions is None else jitOptions cdef cudaLibrary_t library library = cudaLibrary_t() - jitOptions = [] if jitOptions is None else jitOptions if not all(isinstance(_x, (cudaJitOption)) for _x in jitOptions): raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cyruntime.cudaJitOption] or list[cyruntime.cudaJitOption]") cyjitOptions = jitOptions - jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues pylist = [_HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)] voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist) cyjitOptionsValues_ptr = voidStarHelperjitOptionsValues.cptr if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions)) if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions)) - libraryOptions = [] if libraryOptions is None else libraryOptions if not all(isinstance(_x, (cudaLibraryOption)) for _x in libraryOptions): raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cyruntime.cudaLibraryOption] or list[cyruntime.cudaLibraryOption]") cylibraryOptions = libraryOptions - libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues pylist = [_HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)] voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist) cylibraryOptionValues_ptr = voidStarHelperlibraryOptionValues.cptr @@ -39466,10 +39473,10 @@ def cudaDevResourceGenerateDesc(resources : Optional[tuple[cudaDevResource] | li :py:obj:`~.cuDevResourceGenerateDesc`, :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaGreenCtxCreate` """ cdef cyruntime.cudaDevResource* cyresources = NULL + resources = [] if resources is None else resources cdef cudaDevResourceDesc_t phDesc try: phDesc = cudaDevResourceDesc_t() - resources = [] if resources is None else resources if not all(isinstance(_x, (cudaDevResource,)) for _x in resources): raise TypeError("Argument 'resources' is not instance of type (expected tuple[cyruntime.cudaDevResource,] or list[cyruntime.cudaDevResource,]") if len(resources) > 1: