|
template<typename Element , typename FillBuffer , typename PushBuffer > |
void | bufferedTransfer (std::size_t size, FillBuffer &fill, PushBuffer &push) |
|
template<typename Element , typename FillBuffer > |
void | bufferedTransferToDevice (Element *destination, std::size_t size, FillBuffer &fill) |
|
template<typename Element , typename PushBuffer > |
void | bufferedTransferToHost (const Element *source, std::size_t size, PushBuffer &push) |
|
void | checkErrorCode (const char *file_name, int line, error_t error) |
|
void | deviceSynchronize () |
|
template<class T > |
void | funcSetCacheConfig (T *func, enum FuncCache cacheConfig) |
|
int | getArchitectureMajor (int deviceNum) |
|
int | getArchitectureMinor (int deviceNum) |
|
int | getClockRate (int deviceNum) |
|
int | getDevice () |
| Returns the ID of the active device.
|
|
int | getDeviceCores (int deviceNum) |
|
int | getDeviceCoresPerMultiprocessors (int deviceNum) |
|
int | getDeviceCount () |
| Returns the number of devices available in the system.
|
|
int | getDeviceMultiprocessors (int deviceNum) |
|
std::string | getDeviceName (int deviceNum) |
|
bool | getECCEnabled (int deviceNum) |
|
std::size_t | getFreeGlobalMemory () |
|
std::size_t | getGlobalMemorySize (int deviceNum) |
|
__device__ int | getGlobalThreadIdx_x (const dim3 &gridIdx) |
|
__device__ int | getGlobalThreadIdx_y (const dim3 &gridIdx) |
|
__device__ int | getGlobalThreadIdx_z (const dim3 &gridIdx) |
|
template<typename Index > |
__device__ Index | getInterleaving (const Index index) |
|
constexpr int | getMaxBlockXSize () |
|
constexpr int | getMaxBlockYSize () |
|
constexpr int | getMaxBlockZSize () |
|
constexpr std::size_t | getMaxGridXSize () |
|
constexpr std::size_t | getMaxGridYSize () |
|
constexpr std::size_t | getMaxGridZSize () |
|
int | getMemoryClockRate (int deviceNum) |
|
int | getNumberOfBlocks (const int threads, const int blockSize) |
|
int | getNumberOfGrids (const int blocks, const int gridSize) |
|
constexpr int | getNumberOfSharedMemoryBanks () |
|
int | getRegistersPerMultiprocessor (int deviceNum) |
|
template<typename T > |
__device__ T * | getSharedMemory () |
|
std::size_t | getSharedMemoryPerBlock (int deviceNum) |
|
constexpr std::size_t | getTransferBufferSize () |
|
constexpr int | getWarpSize () |
|
template<typename RawKernel , typename... KernelParameters> |
void | launchKernel (RawKernel kernel_function, LaunchConfiguration launch_configuration, KernelParameters &&... parameters) |
|
template<typename RawKernel , typename... KernelParameters> |
void | launchKernelAsync (RawKernel kernel_function, LaunchConfiguration launch_configuration, KernelParameters &&... parameters) |
|
template<typename RawKernel , typename... KernelParameters> |
void | launchKernelSync (RawKernel kernel_function, LaunchConfiguration launch_configuration, KernelParameters &&... parameters) |
|
template<class T > |
__device__ T | ldg (const T &value) |
| Loads data from a global memory using the __ldg() intrinsic.
|
|
void | memcpy (void *dst, const void *src, std::size_t sizeBytes, MemcpyKind kind) |
|
std::ostream & | operator<< (std::ostream &str, const dim3 &d) |
|
void | printThreadsSetup (const dim3 &blockSize, const dim3 &blocksCount, const dim3 &gridSize, const dim3 &gridsCount, std::ostream &str=std::cout) |
|
void | setDevice (int device) |
| Sets the active device.
|
|
void | setupGrid (const dim3 &blocksCount, const dim3 &gridsCount, const dim3 &gridIdx, dim3 &gridSize) |
|
void | setupThreads (const dim3 &blockSize, dim3 &blocksCount, dim3 &gridsCount, long long int xThreads, long long int yThreads=0, long long int zThreads=0) |
|
stream_t | streamCreateWithPriority (unsigned int flags, int priority) |
|
void | streamDestroy (stream_t stream) |
|
void | streamSynchronize (stream_t stream) |
|
Internal namespace for CUDA/HIP backend support.