Template Numerical Library: TNL::Algorithms::SequentialFor< Device

Wrapper to ParallelFor which makes it run sequentially. More...

#include <TNL/Algorithms/SequentialFor.h>

Static Public Member Functions
template<typename Index, typename Function>
static void	exec (Index start, Index end, Function f)
	Static method for execution of the loop.

Detailed Description

template<typename Device = Devices::Sequential>
struct TNL::Algorithms::SequentialFor< Device >

Wrapper to ParallelFor which makes it run sequentially.

It is helpfull for debuging or just sequential for loops on GPUs.

Member Function Documentation

◆ exec()

template<typename Device = Devices::Sequential>

template<typename Index, typename Function>

static void TNL::Algorithms::SequentialFor< Device >::exec	(	Index	start,
		Index	end,
		Function	f )

inlinestatic

Static method for execution of the loop.

Template Parameters

Index	defines the type of indexes over which the loop iterates.
Function	is the type of function to be called in each iteration.

Parameters

start	the for-loop iterates over index interval [start, end).
end	the for-loop iterates over index interval [start, end).
f	is the function to be called in each iteration

Example: #include <iostream>

#include <cstdlib>

#include <TNL/Containers/Vector.h>

#include <TNL/Algorithms/parallelFor.h>

#include <TNL/Algorithms/SequentialFor.h>

using namespace TNL;

using namespace TNL::Containers;

template< typename Device >

void

printVector()

{

const int size( 60 );

TNL::Containers::Vector< float, Device > v( size, 1.0 );

auto view = v.getView();

auto print = [ = ] __cuda_callable__( int i ) mutable

{

if( i % 5 == 0 )

printf( "v[ %d ] = %f \n", i, view[ i ] ); // we use printf because of compatibility with GPU kernels

};

std::cout << "Printing vector using parallel for: " << std::endl;

Algorithms::parallelFor< Device >( 0, v.getSize(), print );

std::cout << "Printing vector using sequential for: " << std::endl;

Algorithms::SequentialFor< Device >::exec( 0, v.getSize(), print );

}

int

main( int argc, char* argv[] )

{

std::cout << "Example on the host:" << std::endl;

printVector< TNL::Devices::Host >();

#ifdef __CUDACC__

std::cout << "Example on CUDA GPU:" << std::endl;

printVector< TNL::Devices::Cuda >();

#endif

return EXIT_SUCCESS;

}

__cuda_callable__
#define __cuda_callable__
Definition Macros.h:49

std::cout

TNL::Containers::Vector
Vector extends Array with algebraic operations.
Definition Vector.h:36

std::endl
T endl(T... args)

TNL::Algorithms::parallelFor
std::enable_if_t< std::is_integral_v< Begin > &&std::is_integral_v< End > > parallelFor(const Begin &begin, const End &end, typename Device::LaunchConfiguration launch_config, Function f, FunctionArgs... args)
Parallel for-loop function for 1D range specified with integral values.
Definition parallelFor.h:41

TNL::Containers
Namespace for TNL containers.
Definition Array.h:17

TNL
The main TNL namespace.
Definition AtomicOperations.h:9

TNL::Algorithms::SequentialFor::exec
static void exec(Index start, Index end, Function f)
Static method for execution of the loop.
Definition SequentialFor.h:37

Output: Example on the host:

Printing vector using parallel for:

v[ 0 ] = 1.000000

v[ 5 ] = 1.000000

v[ 10 ] = 1.000000

v[ 15 ] = 1.000000

v[ 20 ] = 1.000000

v[ 25 ] = 1.000000

v[ 30 ] = 1.000000

v[ 35 ] = 1.000000

v[ 40 ] = 1.000000

v[ 45 ] = 1.000000

v[ 50 ] = 1.000000

v[ 55 ] = 1.000000

Printing vector using sequential for:

v[ 0 ] = 1.000000

v[ 5 ] = 1.000000

v[ 10 ] = 1.000000

v[ 15 ] = 1.000000

v[ 20 ] = 1.000000

v[ 25 ] = 1.000000

v[ 30 ] = 1.000000

v[ 35 ] = 1.000000

v[ 40 ] = 1.000000

v[ 45 ] = 1.000000

v[ 50 ] = 1.000000

v[ 55 ] = 1.000000

Example on CUDA GPU:

Printing vector using parallel for:

v[ 35 ] = 1.000000

v[ 40 ] = 1.000000

v[ 45 ] = 1.000000

v[ 50 ] = 1.000000

v[ 55 ] = 1.000000

v[ 0 ] = 1.000000

v[ 5 ] = 1.000000

v[ 10 ] = 1.000000

v[ 15 ] = 1.000000

v[ 20 ] = 1.000000

v[ 25 ] = 1.000000

v[ 30 ] = 1.000000

Printing vector using sequential for:

v[ 0 ] = 1.000000

v[ 5 ] = 1.000000

v[ 10 ] = 1.000000

v[ 15 ] = 1.000000

v[ 20 ] = 1.000000

v[ 25 ] = 1.000000

v[ 30 ] = 1.000000

v[ 35 ] = 1.000000

v[ 40 ] = 1.000000

v[ 45 ] = 1.000000

v[ 50 ] = 1.000000

v[ 55 ] = 1.000000

The documentation for this struct was generated from the following file:

src/TNL/Algorithms/SequentialFor.h

Static Public Member Functions

Detailed Description

Member Function Documentation

◆ exec()