Template Numerical Library version\ main:4e58ea6
Loading...
Searching...
No Matches
TNL::Backend Namespace Reference

Internal namespace for CUDA/HIP backend support. More...

Classes

struct  LaunchConfiguration
 Holds the parameters necessary to launch a CUDA or HIP kernel (i.e. schedule it for execution on some stream of some device). More...
 
struct  SharedMemory
 
struct  SharedMemory< T, 16 >
 
struct  SharedMemory< T, 32 >
 
struct  SharedMemory< T, 64 >
 
struct  SharedMemory< T, 8 >
 
class  Stream
 
class  StreamPool
 

Typedefs

using error_t = cudaError_t
 
using stream_t = cudaStream_t
 

Enumerations

enum  { StreamDefault = cudaStreamDefault , StreamNonBlocking = cudaStreamNonBlocking }
 
enum  FuncCache { FuncCachePreferNone = cudaFuncCachePreferNone , FuncCachePreferShared = cudaFuncCachePreferShared , FuncCachePreferL1 = cudaFuncCachePreferL1 , FuncCachePreferEqual = cudaFuncCachePreferEqual }
 
enum  MemcpyKind {
  MemcpyHostToHost = cudaMemcpyHostToHost , MemcpyHostToDevice = cudaMemcpyHostToDevice , MemcpyDeviceToHost = cudaMemcpyDeviceToHost , MemcpyDeviceToDevice = cudaMemcpyDeviceToDevice ,
  MemcpyDefault = cudaMemcpyDefault
}
 

Functions

template<typename Element , typename FillBuffer , typename PushBuffer >
void bufferedTransfer (std::size_t size, FillBuffer &fill, PushBuffer &push)
 
template<typename Element , typename FillBuffer >
void bufferedTransferToDevice (Element *destination, std::size_t size, FillBuffer &fill)
 
template<typename Element , typename PushBuffer >
void bufferedTransferToHost (const Element *source, std::size_t size, PushBuffer &push)
 
void checkErrorCode (const char *file_name, int line, error_t error)
 
void deviceSynchronize ()
 
template<class T >
void funcSetCacheConfig (T *func, enum FuncCache cacheConfig)
 
int getArchitectureMajor (int deviceNum)
 
int getArchitectureMinor (int deviceNum)
 
int getClockRate (int deviceNum)
 
int getDevice ()
 Returns the ID of the active device.
 
int getDeviceCores (int deviceNum)
 
int getDeviceCoresPerMultiprocessors (int deviceNum)
 
int getDeviceCount ()
 Returns the number of devices available in the system.
 
int getDeviceMultiprocessors (int deviceNum)
 
std::string getDeviceName (int deviceNum)
 
bool getECCEnabled (int deviceNum)
 
std::size_t getFreeGlobalMemory ()
 
std::size_t getGlobalMemorySize (int deviceNum)
 
__device__ int getGlobalThreadIdx_x (const dim3 &gridIdx)
 
__device__ int getGlobalThreadIdx_y (const dim3 &gridIdx)
 
__device__ int getGlobalThreadIdx_z (const dim3 &gridIdx)
 
template<typename Index >
__device__ Index getInterleaving (const Index index)
 
constexpr int getMaxBlockXSize ()
 
constexpr int getMaxBlockYSize ()
 
constexpr int getMaxBlockZSize ()
 
constexpr std::size_t getMaxGridXSize ()
 
constexpr std::size_t getMaxGridYSize ()
 
constexpr std::size_t getMaxGridZSize ()
 
int getMemoryClockRate (int deviceNum)
 
int getNumberOfBlocks (const int threads, const int blockSize)
 
int getNumberOfGrids (const int blocks, const int gridSize)
 
constexpr int getNumberOfSharedMemoryBanks ()
 
int getRegistersPerMultiprocessor (int deviceNum)
 
template<typename T >
__device__ T * getSharedMemory ()
 
std::size_t getSharedMemoryPerBlock (int deviceNum)
 
constexpr std::size_t getTransferBufferSize ()
 
constexpr int getWarpSize ()
 
template<typename RawKernel , typename... KernelParameters>
void launchKernel (RawKernel kernel_function, LaunchConfiguration launch_configuration, KernelParameters &&... parameters)
 
template<typename RawKernel , typename... KernelParameters>
void launchKernelAsync (RawKernel kernel_function, LaunchConfiguration launch_configuration, KernelParameters &&... parameters)
 
template<typename RawKernel , typename... KernelParameters>
void launchKernelSync (RawKernel kernel_function, LaunchConfiguration launch_configuration, KernelParameters &&... parameters)
 
template<class T >
__device__ T ldg (const T &value)
 Loads data from a global memory using the __ldg() intrinsic.
 
void memcpy (void *dst, const void *src, std::size_t sizeBytes, MemcpyKind kind)
 
std::ostreamoperator<< (std::ostream &str, const dim3 &d)
 
void printThreadsSetup (const dim3 &blockSize, const dim3 &blocksCount, const dim3 &gridSize, const dim3 &gridsCount, std::ostream &str=std::cout)
 
void setDevice (int device)
 Sets the active device.
 
void setupGrid (const dim3 &blocksCount, const dim3 &gridsCount, const dim3 &gridIdx, dim3 &gridSize)
 
void setupThreads (const dim3 &blockSize, dim3 &blocksCount, dim3 &gridsCount, long long int xThreads, long long int yThreads=0, long long int zThreads=0)
 
stream_t streamCreateWithPriority (unsigned int flags, int priority)
 
void streamDestroy (stream_t stream)
 
void streamSynchronize (stream_t stream)
 

Detailed Description

Internal namespace for CUDA/HIP backend support.

Function Documentation

◆ bufferedTransferToDevice()

template<typename Element , typename FillBuffer >
void TNL::Backend::bufferedTransferToDevice ( Element * destination,
std::size_t size,
FillBuffer & fill )

This function creates a buffer on the host, the fill handler fills it with data and this function transfers data from the buffer to the destination, which is a pointer to device memory.

◆ bufferedTransferToHost()

template<typename Element , typename PushBuffer >
void TNL::Backend::bufferedTransferToHost ( const Element * source,
std::size_t size,
PushBuffer & push )

This function creates a buffer on the host, fills it with data transferred from source, which is a pointer to device memory, and the push handler processes the data in the buffer.