Template Numerical Library: TNL::Backend Namespace Reference

Internal namespace for CUDA/HIP backend support. More...

Classes
struct	LaunchConfiguration
	Holds the parameters necessary to launch a CUDA or HIP kernel (i.e. schedule it for execution on some stream of some device). More...

struct	SharedMemory

struct	SharedMemory< T, 16 >

struct	SharedMemory< T, 32 >

struct	SharedMemory< T, 64 >

struct	SharedMemory< T, 8 >

class	Stream

class	StreamPool

Typedefs
using	error_t = cudaError_t

using	stream_t = cudaStream_t

Enumerations
enum	{ StreamDefault = cudaStreamDefault , StreamNonBlocking = cudaStreamNonBlocking }

enum	FuncCache { FuncCachePreferNone = cudaFuncCachePreferNone , FuncCachePreferShared = cudaFuncCachePreferShared , FuncCachePreferL1 = cudaFuncCachePreferL1 , FuncCachePreferEqual = cudaFuncCachePreferEqual }

enum	MemcpyKind { MemcpyHostToHost = cudaMemcpyHostToHost , MemcpyHostToDevice = cudaMemcpyHostToDevice , MemcpyDeviceToHost = cudaMemcpyDeviceToHost , MemcpyDeviceToDevice = cudaMemcpyDeviceToDevice , MemcpyDefault = cudaMemcpyDefault }

Functions
template<typename Element, typename FillBuffer, typename PushBuffer>
void	bufferedTransfer (std::size_t size, FillBuffer &fill, PushBuffer &push)

template<typename Element, typename FillBuffer>
void	bufferedTransferToDevice (Element *destination, std::size_t size, FillBuffer &fill)

template<typename Element, typename PushBuffer>
void	bufferedTransferToHost (const Element *source, std::size_t size, PushBuffer &push)

void	checkErrorCode (const char *file_name, int line, error_t error)

void	deviceSynchronize ()

template<class T>
void	funcSetCacheConfig (T *func, enum FuncCache cacheConfig)

int	getArchitectureMajor (int deviceNum)

int	getArchitectureMinor (int deviceNum)

int	getClockRate (int deviceNum)

int	getDevice ()
	Returns the ID of the active device.

int	getDeviceCores (int deviceNum)

int	getDeviceCoresPerMultiprocessors (int deviceNum)

int	getDeviceCount ()
	Returns the number of devices available in the system.

int	getDeviceMultiprocessors (int deviceNum)

std::string	getDeviceName (int deviceNum)

bool	getECCEnabled (int deviceNum)

std::size_t	getFreeGlobalMemory ()

std::size_t	getGlobalMemorySize (int deviceNum)

__device__ int	getGlobalThreadIdx_x (const dim3 &gridIdx)

__device__ int	getGlobalThreadIdx_y (const dim3 &gridIdx)

__device__ int	getGlobalThreadIdx_z (const dim3 &gridIdx)

template<typename Index>
__device__ Index	getInterleaving (const Index index)

constexpr int	getMaxBlockXSize ()

constexpr int	getMaxBlockYSize ()

constexpr int	getMaxBlockZSize ()

constexpr std::size_t	getMaxGridXSize ()

constexpr std::size_t	getMaxGridYSize ()

constexpr std::size_t	getMaxGridZSize ()

int	getMemoryClockRate (int deviceNum)

int	getNumberOfBlocks (const int threads, const int blockSize)

int	getNumberOfGrids (const int blocks, const int gridSize)

constexpr int	getNumberOfSharedMemoryBanks ()

int	getRegistersPerMultiprocessor (int deviceNum)

template<typename T>
__device__ T *	getSharedMemory ()

std::size_t	getSharedMemoryPerBlock (int deviceNum)

constexpr std::size_t	getTransferBufferSize ()

constexpr int	getWarpSize ()

template<typename RawKernel, typename... KernelParameters>
void	launchKernel (RawKernel kernel_function, LaunchConfiguration launch_configuration, KernelParameters &&... parameters)

template<typename RawKernel, typename... KernelParameters>
void	launchKernelAsync (RawKernel kernel_function, LaunchConfiguration launch_configuration, KernelParameters &&... parameters)

template<typename RawKernel, typename... KernelParameters>
void	launchKernelSync (RawKernel kernel_function, LaunchConfiguration launch_configuration, KernelParameters &&... parameters)

template<class T>
__device__ T	ldg (const T &value)
	Loads data from a global memory using the `__ldg()` intrinsic.

void	memcpy (void dst, const void src, std::size_t sizeBytes, MemcpyKind kind)

std::ostream &	operator<< (std::ostream &str, const dim3 &d)

void	printThreadsSetup (const dim3 &blockSize, const dim3 &blocksCount, const dim3 &gridSize, const dim3 &gridsCount, std::ostream &str=std::cout)

void	setDevice (int device)
	Sets the active device.

void	setupGrid (const dim3 &blocksCount, const dim3 &gridsCount, const dim3 &gridIdx, dim3 &gridSize)

void	setupThreads (const dim3 &blockSize, dim3 &blocksCount, dim3 &gridsCount, long long int xThreads, long long int yThreads=0, long long int zThreads=0)

stream_t	streamCreateWithPriority (unsigned int flags, int priority)

void	streamDestroy (stream_t stream)

void	streamSynchronize (stream_t stream)

Detailed Description

Internal namespace for CUDA/HIP backend support.

Function Documentation

◆ bufferedTransferToDevice()

template<typename Element, typename FillBuffer>

void TNL::Backend::bufferedTransferToDevice	(	Element *	destination,
		std::size_t	size,
		FillBuffer &	fill )

This function creates a buffer on the host, the fill handler fills it with data and this function transfers data from the buffer to the destination, which is a pointer to device memory.

◆ bufferedTransferToHost()

template<typename Element, typename PushBuffer>

void TNL::Backend::bufferedTransferToHost	(	const Element *	source,
		std::size_t	size,
		PushBuffer &	push )

This function creates a buffer on the host, fills it with data transferred from source, which is a pointer to device memory, and the push handler processes the data in the buffer.

Classes

Typedefs

Enumerations

Functions

Detailed Description

Function Documentation

◆ bufferedTransferToDevice()

◆ bufferedTransferToHost()